super-intel.c

   1 /*
   2  * mdadm - Intel(R) Matrix Storage Manager Support
   3  *
   4  * Copyright (C) 2002-2008 Intel Corporation
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along with
  16  * this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  18  */
  19
  20 #define HAVE_STDINT_H 1
  21 #include "mdadm.h"
  22 #include "mdmon.h"
  23 #include "dlink.h"
  24 #include "sha1.h"
  25 #include "platform-intel.h"
  26 #include <values.h>
  27 #include <scsi/sg.h>
  28 #include <ctype.h>
  29 #include <dirent.h>
  30
  31 /* MPB == Metadata Parameter Block */
  32 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
  33 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
  34 #define MPB_VERSION_RAID0 "1.0.00"
  35 #define MPB_VERSION_RAID1 "1.1.00"
  36 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
  37 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
  38 #define MPB_VERSION_RAID5 "1.2.02"
  39 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
  40 #define MPB_VERSION_CNG "1.2.06"
  41 #define MPB_VERSION_ATTRIBS "1.3.00"
  42 #define MAX_SIGNATURE_LENGTH  32
  43 #define MAX_RAID_SERIAL_LEN   16
  44
  45 /* supports RAID0 */
  46 #define MPB_ATTRIB_RAID0                __cpu_to_le32(0x00000001)
  47 /* supports RAID1 */
  48 #define MPB_ATTRIB_RAID1                __cpu_to_le32(0x00000002)
  49 /* supports RAID10 */
  50 #define MPB_ATTRIB_RAID10               __cpu_to_le32(0x00000004)
  51 /* supports RAID1E */
  52 #define MPB_ATTRIB_RAID1E               __cpu_to_le32(0x00000008)
  53 /* supports RAID5 */
  54 #define MPB_ATTRIB_RAID5                __cpu_to_le32(0x00000010)
  55 /* supports RAID CNG */
  56 #define MPB_ATTRIB_RAIDCNG              __cpu_to_le32(0x00000020)
  57 /* supports expanded stripe sizes of  256K, 512K and 1MB */
  58 #define MPB_ATTRIB_EXP_STRIPE_SIZE      __cpu_to_le32(0x00000040)
  59
  60 /* The OROM Support RST Caching of Volumes */
  61 #define MPB_ATTRIB_NVM                  __cpu_to_le32(0x02000000)
  62 /* The OROM supports creating disks greater than 2TB */
  63 #define MPB_ATTRIB_2TB_DISK             __cpu_to_le32(0x04000000)
  64 /* The OROM supports Bad Block Management */
  65 #define MPB_ATTRIB_BBM                  __cpu_to_le32(0x08000000)
  66
  67 /* THe OROM Supports NVM Caching of Volumes */
  68 #define MPB_ATTRIB_NEVER_USE2           __cpu_to_le32(0x10000000)
  69 /* The OROM supports creating volumes greater than 2TB */
  70 #define MPB_ATTRIB_2TB                  __cpu_to_le32(0x20000000)
  71 /* originally for PMP, now it's wasted b/c. Never use this bit! */
  72 #define MPB_ATTRIB_NEVER_USE            __cpu_to_le32(0x40000000)
  73 /* Verify MPB contents against checksum after reading MPB */
  74 #define MPB_ATTRIB_CHECKSUM_VERIFY      __cpu_to_le32(0x80000000)
  75
  76 /* Define all supported attributes that have to be accepted by mdadm
  77  */
  78 #define MPB_ATTRIB_SUPPORTED           (MPB_ATTRIB_CHECKSUM_VERIFY | \
  79                                         MPB_ATTRIB_2TB             | \
  80                                         MPB_ATTRIB_2TB_DISK        | \
  81                                         MPB_ATTRIB_RAID0           | \
  82                                         MPB_ATTRIB_RAID1           | \
  83                                         MPB_ATTRIB_RAID10          | \
  84                                         MPB_ATTRIB_RAID5           | \
  85                                         MPB_ATTRIB_EXP_STRIPE_SIZE | \
  86                                         MPB_ATTRIB_BBM)
  87
  88 /* Define attributes that are unused but not harmful */
  89 #define MPB_ATTRIB_IGNORED              (MPB_ATTRIB_NEVER_USE)
  90
  91 #define MPB_SECTOR_CNT 2210
  92 #define IMSM_RESERVED_SECTORS 8192
  93 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2048
  94 #define SECT_PER_MB_SHIFT 11
  95 #define MAX_SECTOR_SIZE 4096
  96 #define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole
  97                                                    * mutliple PPL area
  98                                                    */
  99
 100 /*
 101  * Internal Write-intent bitmap is stored in the same area where PPL.
 102  * Both features are mutually exclusive, so it is not an issue.
 103  * The first 8KiB of the area are reserved and shall not be used.
 104  */
 105 #define IMSM_BITMAP_AREA_RESERVED_SIZE 8192
 106
 107 #define IMSM_BITMAP_HEADER_OFFSET (IMSM_BITMAP_AREA_RESERVED_SIZE)
 108 #define IMSM_BITMAP_HEADER_SIZE MAX_SECTOR_SIZE
 109
 110 #define IMSM_BITMAP_START_OFFSET (IMSM_BITMAP_HEADER_OFFSET + IMSM_BITMAP_HEADER_SIZE)
 111 #define IMSM_BITMAP_AREA_SIZE (MULTIPLE_PPL_AREA_SIZE_IMSM - IMSM_BITMAP_START_OFFSET)
 112 #define IMSM_BITMAP_AND_HEADER_SIZE (IMSM_BITMAP_AREA_SIZE + IMSM_BITMAP_HEADER_SIZE)
 113
 114 #define IMSM_DEFAULT_BITMAP_CHUNKSIZE (64 * 1024 * 1024)
 115 #define IMSM_DEFAULT_BITMAP_DAEMON_SLEEP 5
 116
 117 /*
 118  * This macro let's us ensure that no-one accidentally
 119  * changes the size of a struct
 120  */
 121 #define ASSERT_SIZE(_struct, size) \
 122 static inline void __assert_size_##_struct(void)        \
 123 {                                                       \
 124         switch (0) {                                    \
 125         case 0: break;                                  \
 126         case (sizeof(struct _struct) == size): break;   \
 127         }                                               \
 128 }
 129
 130 /* Disk configuration info. */
 131 #define IMSM_MAX_DEVICES 255
 132 struct imsm_disk {
 133         __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
 134         __u32 total_blocks_lo;           /* 0xE8 - 0xEB total blocks lo */
 135         __u32 scsi_id;                   /* 0xEC - 0xEF scsi ID */
 136 #define SPARE_DISK      __cpu_to_le32(0x01)  /* Spare */
 137 #define CONFIGURED_DISK __cpu_to_le32(0x02)  /* Member of some RaidDev */
 138 #define FAILED_DISK     __cpu_to_le32(0x04)  /* Permanent failure */
 139 #define JOURNAL_DISK    __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */
 140         __u32 status;                    /* 0xF0 - 0xF3 */
 141         __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
 142         __u32 total_blocks_hi;           /* 0xF4 - 0xF5 total blocks hi */
 143 #define IMSM_DISK_FILLERS       3
 144         __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
 145 };
 146 ASSERT_SIZE(imsm_disk, 48)
 147
 148 /* map selector for map managment
 149  */
 150 #define MAP_0           0
 151 #define MAP_1           1
 152 #define MAP_X           -1
 153
 154 /* RAID map configuration infos. */
 155 struct imsm_map {
 156         __u32 pba_of_lba0_lo;   /* start address of partition */
 157         __u32 blocks_per_member_lo;/* blocks per member */
 158         __u32 num_data_stripes_lo;      /* number of data stripes */
 159         __u16 blocks_per_strip;
 160         __u8  map_state;        /* Normal, Uninitialized, Degraded, Failed */
 161 #define IMSM_T_STATE_NORMAL 0
 162 #define IMSM_T_STATE_UNINITIALIZED 1
 163 #define IMSM_T_STATE_DEGRADED 2
 164 #define IMSM_T_STATE_FAILED 3
 165         __u8  raid_level;
 166 #define IMSM_T_RAID0 0
 167 #define IMSM_T_RAID1 1
 168 #define IMSM_T_RAID5 5          /* since metadata version 1.2.02 ? */
 169         __u8  num_members;      /* number of member disks */
 170         __u8  num_domains;      /* number of parity domains */
 171         __u8  failed_disk_num;  /* valid only when state is degraded */
 172         __u8  ddf;
 173         __u32 pba_of_lba0_hi;
 174         __u32 blocks_per_member_hi;
 175         __u32 num_data_stripes_hi;
 176         __u32 filler[4];        /* expansion area */
 177 #define IMSM_ORD_REBUILD (1 << 24)
 178         __u32 disk_ord_tbl[1];  /* disk_ord_tbl[num_members],
 179                                  * top byte contains some flags
 180                                  */
 181 };
 182 ASSERT_SIZE(imsm_map, 52)
 183
 184 struct imsm_vol {
 185         __u32 curr_migr_unit_lo;
 186         __u32 checkpoint_id;    /* id to access curr_migr_unit */
 187         __u8  migr_state;       /* Normal or Migrating */
 188 #define MIGR_INIT 0
 189 #define MIGR_REBUILD 1
 190 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
 191 #define MIGR_GEN_MIGR 3
 192 #define MIGR_STATE_CHANGE 4
 193 #define MIGR_REPAIR 5
 194         __u8  migr_type;        /* Initializing, Rebuilding, ... */
 195 #define RAIDVOL_CLEAN          0
 196 #define RAIDVOL_DIRTY          1
 197 #define RAIDVOL_DSRECORD_VALID 2
 198         __u8  dirty;
 199         __u8  fs_state;         /* fast-sync state for CnG (0xff == disabled) */
 200         __u16 verify_errors;    /* number of mismatches */
 201         __u16 bad_blocks;       /* number of bad blocks during verify */
 202         __u32 curr_migr_unit_hi;
 203         __u32 filler[3];
 204         struct imsm_map map[1];
 205         /* here comes another one if migr_state */
 206 };
 207 ASSERT_SIZE(imsm_vol, 84)
 208
 209 struct imsm_dev {
 210         __u8  volume[MAX_RAID_SERIAL_LEN];
 211         __u32 size_low;
 212         __u32 size_high;
 213 #define DEV_BOOTABLE            __cpu_to_le32(0x01)
 214 #define DEV_BOOT_DEVICE         __cpu_to_le32(0x02)
 215 #define DEV_READ_COALESCING     __cpu_to_le32(0x04)
 216 #define DEV_WRITE_COALESCING    __cpu_to_le32(0x08)
 217 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
 218 #define DEV_HIDDEN_AT_BOOT      __cpu_to_le32(0x20)
 219 #define DEV_CURRENTLY_HIDDEN    __cpu_to_le32(0x40)
 220 #define DEV_VERIFY_AND_FIX      __cpu_to_le32(0x80)
 221 #define DEV_MAP_STATE_UNINIT    __cpu_to_le32(0x100)
 222 #define DEV_NO_AUTO_RECOVERY    __cpu_to_le32(0x200)
 223 #define DEV_CLONE_N_GO          __cpu_to_le32(0x400)
 224 #define DEV_CLONE_MAN_SYNC      __cpu_to_le32(0x800)
 225 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
 226         __u32 status;   /* Persistent RaidDev status */
 227         __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
 228         __u8  migr_priority;
 229         __u8  num_sub_vols;
 230         __u8  tid;
 231         __u8  cng_master_disk;
 232         __u16 cache_policy;
 233         __u8  cng_state;
 234         __u8  cng_sub_state;
 235         __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */
 236
 237         /* NVM_EN */
 238         __u8 nv_cache_mode;
 239         __u8 nv_cache_flags;
 240
 241         /* Unique Volume Id of the NvCache Volume associated with this volume */
 242         __u32 nvc_vol_orig_family_num;
 243         __u16 nvc_vol_raid_dev_num;
 244
 245 #define RWH_OFF 0
 246 #define RWH_DISTRIBUTED 1
 247 #define RWH_JOURNALING_DRIVE 2
 248 #define RWH_MULTIPLE_DISTRIBUTED 3
 249 #define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
 250 #define RWH_MULTIPLE_OFF 5
 251 #define RWH_BITMAP 6
 252         __u8  rwh_policy; /* Raid Write Hole Policy */
 253         __u8  jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
 254         __u8  filler1;
 255
 256 #define IMSM_DEV_FILLERS 3
 257         __u32 filler[IMSM_DEV_FILLERS];
 258         struct imsm_vol vol;
 259 };
 260 ASSERT_SIZE(imsm_dev, 164)
 261
 262 struct imsm_super {
 263         __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
 264         __u32 check_sum;                /* 0x20 - 0x23 MPB Checksum */
 265         __u32 mpb_size;                 /* 0x24 - 0x27 Size of MPB */
 266         __u32 family_num;               /* 0x28 - 0x2B Checksum from first time this config was written */
 267         __u32 generation_num;           /* 0x2C - 0x2F Incremented each time this array's MPB is written */
 268         __u32 error_log_size;           /* 0x30 - 0x33 in bytes */
 269         __u32 attributes;               /* 0x34 - 0x37 */
 270         __u8 num_disks;                 /* 0x38 Number of configured disks */
 271         __u8 num_raid_devs;             /* 0x39 Number of configured volumes */
 272         __u8 error_log_pos;             /* 0x3A  */
 273         __u8 fill[1];                   /* 0x3B */
 274         __u32 cache_size;               /* 0x3c - 0x40 in mb */
 275         __u32 orig_family_num;          /* 0x40 - 0x43 original family num */
 276         __u32 pwr_cycle_count;          /* 0x44 - 0x47 simulated power cycle count for array */
 277         __u32 bbm_log_size;             /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
 278         __u16 num_raid_devs_created;    /* 0x4C - 0x4D Used for generating unique
 279                                          * volume IDs for raid_dev created in this array
 280                                          * (starts at 1)
 281                                          */
 282         __u16 filler1;                  /* 0x4E - 0x4F */
 283         __u64 creation_time;            /* 0x50 - 0x57 Array creation time */
 284 #define IMSM_FILLERS 32
 285         __u32 filler[IMSM_FILLERS];     /* 0x58 - 0xD7 RAID_MPB_FILLERS */
 286         struct imsm_disk disk[1];       /* 0xD8 diskTbl[numDisks] */
 287         /* here comes imsm_dev[num_raid_devs] */
 288         /* here comes BBM logs */
 289 };
 290 ASSERT_SIZE(imsm_super, 264)
 291
 292 #define BBM_LOG_MAX_ENTRIES 254
 293 #define BBM_LOG_MAX_LBA_ENTRY_VAL 256           /* Represents 256 LBAs */
 294 #define BBM_LOG_SIGNATURE 0xabadb10c
 295
 296 struct bbm_log_block_addr {
 297         __u16 w1;
 298         __u32 dw1;
 299 } __attribute__ ((__packed__));
 300
 301 struct bbm_log_entry {
 302         __u8 marked_count;              /* Number of blocks marked - 1 */
 303         __u8 disk_ordinal;              /* Disk entry within the imsm_super */
 304         struct bbm_log_block_addr defective_block_start;
 305 } __attribute__ ((__packed__));
 306
 307 struct bbm_log {
 308         __u32 signature; /* 0xABADB10C */
 309         __u32 entry_count;
 310         struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
 311 };
 312 ASSERT_SIZE(bbm_log, 2040)
 313
 314 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
 315
 316 #define BLOCKS_PER_KB   (1024/512)
 317
 318 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
 319
 320 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
 321
 322 #define MIGR_REC_BUF_SECTORS 1 /* size of migr_record i/o buffer in sectors */
 323 #define MIGR_REC_SECTOR_POSITION 1 /* migr_record position offset on disk,
 324                                * MIGR_REC_BUF_SECTORS <= MIGR_REC_SECTOR_POS
 325                                */
 326
 327 #define UNIT_SRC_NORMAL     0   /* Source data for curr_migr_unit must
 328                                  *  be recovered using srcMap */
 329 #define UNIT_SRC_IN_CP_AREA 1   /* Source data for curr_migr_unit has
 330                                  *  already been migrated and must
 331                                  *  be recovered from checkpoint area */
 332
 333 #define PPL_ENTRY_SPACE (128 * 1024) /* Size of single PPL, without the header */
 334
 335 struct migr_record {
 336         __u32 rec_status;           /* Status used to determine how to restart
 337                                      * migration in case it aborts
 338                                      * in some fashion */
 339         __u32 curr_migr_unit_lo;    /* 0..numMigrUnits-1 */
 340         __u32 family_num;           /* Family number of MPB
 341                                      * containing the RaidDev
 342                                      * that is migrating */
 343         __u32 ascending_migr;       /* True if migrating in increasing
 344                                      * order of lbas */
 345         __u32 blocks_per_unit;      /* Num disk blocks per unit of operation */
 346         __u32 dest_depth_per_unit;  /* Num member blocks each destMap
 347                                      * member disk
 348                                      * advances per unit-of-operation */
 349         __u32 ckpt_area_pba_lo;     /* Pba of first block of ckpt copy area */
 350         __u32 dest_1st_member_lba_lo;   /* First member lba on first
 351                                          * stripe of destination */
 352         __u32 num_migr_units_lo;    /* Total num migration units-of-op */
 353         __u32 post_migr_vol_cap;    /* Size of volume after
 354                                      * migration completes */
 355         __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
 356         __u32 ckpt_read_disk_num;   /* Which member disk in destSubMap[0] the
 357                                      * migration ckpt record was read from
 358                                      * (for recovered migrations) */
 359         __u32 curr_migr_unit_hi;    /* 0..numMigrUnits-1 high order 32 bits */
 360         __u32 ckpt_area_pba_hi;     /* Pba of first block of ckpt copy area
 361                                      * high order 32 bits */
 362         __u32 dest_1st_member_lba_hi; /* First member lba on first stripe of
 363                                        * destination - high order 32 bits */
 364         __u32 num_migr_units_hi;      /* Total num migration units-of-op
 365                                        * high order 32 bits */
 366         __u32 filler[16];
 367 };
 368 ASSERT_SIZE(migr_record, 128)
 369
 370 /**
 371  * enum imsm_status - internal IMSM return values representation.
 372  * @STATUS_OK: function succeeded.
 373  * @STATUS_ERROR: General error ocurred (not specified).
 374  *
 375  * Typedefed to imsm_status_t.
 376  */
 377 typedef enum imsm_status {
 378         IMSM_STATUS_ERROR = -1,
 379         IMSM_STATUS_OK = 0,
 380 } imsm_status_t;
 381
 382 struct md_list {
 383         /* usage marker:
 384          *  1: load metadata
 385          *  2: metadata does not match
 386          *  4: already checked
 387          */
 388         int   used;
 389         char  *devname;
 390         int   found;
 391         int   container;
 392         dev_t st_rdev;
 393         struct md_list *next;
 394 };
 395
 396 #define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
 397
 398 static __u8 migr_type(struct imsm_dev *dev)
 399 {
 400         if (dev->vol.migr_type == MIGR_VERIFY &&
 401             dev->status & DEV_VERIFY_AND_FIX)
 402                 return MIGR_REPAIR;
 403         else
 404                 return dev->vol.migr_type;
 405 }
 406
 407 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
 408 {
 409         /* for compatibility with older oroms convert MIGR_REPAIR, into
 410          * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
 411          */
 412         if (migr_type == MIGR_REPAIR) {
 413                 dev->vol.migr_type = MIGR_VERIFY;
 414                 dev->status |= DEV_VERIFY_AND_FIX;
 415         } else {
 416                 dev->vol.migr_type = migr_type;
 417                 dev->status &= ~DEV_VERIFY_AND_FIX;
 418         }
 419 }
 420
 421 static unsigned int sector_count(__u32 bytes, unsigned int sector_size)
 422 {
 423         return ROUND_UP(bytes, sector_size) / sector_size;
 424 }
 425
 426 static unsigned int mpb_sectors(struct imsm_super *mpb,
 427                                         unsigned int sector_size)
 428 {
 429         return sector_count(__le32_to_cpu(mpb->mpb_size), sector_size);
 430 }
 431
 432 struct intel_dev {
 433         struct imsm_dev *dev;
 434         struct intel_dev *next;
 435         unsigned index;
 436 };
 437
 438 struct intel_hba {
 439         enum sys_dev_type type;
 440         char *path;
 441         char *pci_id;
 442         struct intel_hba *next;
 443 };
 444
 445 enum action {
 446         DISK_REMOVE = 1,
 447         DISK_ADD
 448 };
 449 /* internal representation of IMSM metadata */
 450 struct intel_super {
 451         union {
 452                 void *buf; /* O_DIRECT buffer for reading/writing metadata */
 453                 struct imsm_super *anchor; /* immovable parameters */
 454         };
 455         union {
 456                 void *migr_rec_buf; /* buffer for I/O operations */
 457                 struct migr_record *migr_rec; /* migration record */
 458         };
 459         int clean_migration_record_by_mdmon; /* when reshape is switched to next
 460                 array, it indicates that mdmon is allowed to clean migration
 461                 record */
 462         size_t len; /* size of the 'buf' allocation */
 463         size_t extra_space; /* extra space in 'buf' that is not used yet */
 464         void *next_buf; /* for realloc'ing buf from the manager */
 465         size_t next_len;
 466         int updates_pending; /* count of pending updates for mdmon */
 467         int current_vol; /* index of raid device undergoing creation */
 468         unsigned long long create_offset; /* common start for 'current_vol' */
 469         __u32 random; /* random data for seeding new family numbers */
 470         struct intel_dev *devlist;
 471         unsigned int sector_size; /* sector size of used member drives */
 472         struct dl {
 473                 struct dl *next;
 474                 int index;
 475                 __u8 serial[MAX_RAID_SERIAL_LEN];
 476                 int major, minor;
 477                 char *devname;
 478                 struct imsm_disk disk;
 479                 int fd;
 480                 int extent_cnt;
 481                 struct extent *e; /* for determining freespace @ create */
 482                 int raiddisk; /* slot to fill in autolayout */
 483                 enum action action;
 484         } *disks, *current_disk;
 485         struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
 486                                       active */
 487         struct dl *missing; /* disks removed while we weren't looking */
 488         struct bbm_log *bbm_log;
 489         struct intel_hba *hba; /* device path of the raid controller for this metadata */
 490         const struct imsm_orom *orom; /* platform firmware support */
 491         struct intel_super *next; /* (temp) list for disambiguating family_num */
 492         struct md_bb bb;        /* memory for get_bad_blocks call */
 493 };
 494
 495 struct intel_disk {
 496         struct imsm_disk disk;
 497         #define IMSM_UNKNOWN_OWNER (-1)
 498         int owner;
 499         struct intel_disk *next;
 500 };
 501
 502 /**
 503  * struct extent - reserved space details.
 504  * @start: start offset.
 505  * @size: size of reservation, set to 0 for metadata reservation.
 506  * @vol: index of the volume, meaningful if &size is set.
 507  */
 508 struct extent {
 509         unsigned long long start, size;
 510         int vol;
 511 };
 512
 513 /* definitions of reshape process types */
 514 enum imsm_reshape_type {
 515         CH_TAKEOVER,
 516         CH_MIGRATION,
 517         CH_ARRAY_SIZE,
 518 };
 519
 520 /* definition of messages passed to imsm_process_update */
 521 enum imsm_update_type {
 522         update_activate_spare,
 523         update_create_array,
 524         update_kill_array,
 525         update_rename_array,
 526         update_add_remove_disk,
 527         update_reshape_container_disks,
 528         update_reshape_migration,
 529         update_takeover,
 530         update_general_migration_checkpoint,
 531         update_size_change,
 532         update_prealloc_badblocks_mem,
 533         update_rwh_policy,
 534 };
 535
 536 struct imsm_update_activate_spare {
 537         enum imsm_update_type type;
 538         struct dl *dl;
 539         int slot;
 540         int array;
 541         struct imsm_update_activate_spare *next;
 542 };
 543
 544 struct geo_params {
 545         char devnm[32];
 546         char *dev_name;
 547         unsigned long long size;
 548         int level;
 549         int layout;
 550         int chunksize;
 551         int raid_disks;
 552 };
 553
 554 enum takeover_direction {
 555         R10_TO_R0,
 556         R0_TO_R10
 557 };
 558 struct imsm_update_takeover {
 559         enum imsm_update_type type;
 560         int subarray;
 561         enum takeover_direction direction;
 562 };
 563
 564 struct imsm_update_reshape {
 565         enum imsm_update_type type;
 566         int old_raid_disks;
 567         int new_raid_disks;
 568
 569         int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
 570 };
 571
 572 struct imsm_update_reshape_migration {
 573         enum imsm_update_type type;
 574         int old_raid_disks;
 575         int new_raid_disks;
 576         /* fields for array migration changes
 577          */
 578         int subdev;
 579         int new_level;
 580         int new_layout;
 581         int new_chunksize;
 582
 583         int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
 584 };
 585
 586 struct imsm_update_size_change {
 587         enum imsm_update_type type;
 588         int subdev;
 589         long long new_size;
 590 };
 591
 592 struct imsm_update_general_migration_checkpoint {
 593         enum imsm_update_type type;
 594         __u64 curr_migr_unit;
 595 };
 596
 597 struct disk_info {
 598         __u8 serial[MAX_RAID_SERIAL_LEN];
 599 };
 600
 601 struct imsm_update_create_array {
 602         enum imsm_update_type type;
 603         int dev_idx;
 604         struct imsm_dev dev;
 605 };
 606
 607 struct imsm_update_kill_array {
 608         enum imsm_update_type type;
 609         int dev_idx;
 610 };
 611
 612 struct imsm_update_rename_array {
 613         enum imsm_update_type type;
 614         __u8 name[MAX_RAID_SERIAL_LEN];
 615         int dev_idx;
 616 };
 617
 618 struct imsm_update_add_remove_disk {
 619         enum imsm_update_type type;
 620 };
 621
 622 struct imsm_update_prealloc_bb_mem {
 623         enum imsm_update_type type;
 624 };
 625
 626 struct imsm_update_rwh_policy {
 627         enum imsm_update_type type;
 628         int new_policy;
 629         int dev_idx;
 630 };
 631
 632 static const char *_sys_dev_type[] = {
 633         [SYS_DEV_UNKNOWN] = "Unknown",
 634         [SYS_DEV_SAS] = "SAS",
 635         [SYS_DEV_SATA] = "SATA",
 636         [SYS_DEV_NVME] = "NVMe",
 637         [SYS_DEV_VMD] = "VMD",
 638         [SYS_DEV_SATA_VMD] = "SATA VMD"
 639 };
 640
 641 static int no_platform = -1;
 642
 643 static int check_no_platform(void)
 644 {
 645         static const char search[] = "mdadm.imsm.test=1";
 646         FILE *fp;
 647
 648         if (no_platform >= 0)
 649                 return no_platform;
 650
 651         if (check_env("IMSM_NO_PLATFORM")) {
 652                 no_platform = 1;
 653                 return 1;
 654         }
 655         fp = fopen("/proc/cmdline", "r");
 656         if (fp) {
 657                 char *l = conf_line(fp);
 658                 char *w = l;
 659
 660                 if (l == NULL) {
 661                         fclose(fp);
 662                         return 0;
 663                 }
 664
 665                 do {
 666                         if (strcmp(w, search) == 0)
 667                                 no_platform = 1;
 668                         w = dl_next(w);
 669                 } while (w != l);
 670                 free_line(l);
 671                 fclose(fp);
 672                 if (no_platform >= 0)
 673                         return no_platform;
 674         }
 675         no_platform = 0;
 676         return 0;
 677 }
 678
 679 void imsm_set_no_platform(int v)
 680 {
 681         no_platform = v;
 682 }
 683
 684 const char *get_sys_dev_type(enum sys_dev_type type)
 685 {
 686         if (type >= SYS_DEV_MAX)
 687                 type = SYS_DEV_UNKNOWN;
 688
 689         return _sys_dev_type[type];
 690 }
 691
 692 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
 693 {
 694         struct intel_hba *result = xmalloc(sizeof(*result));
 695
 696         result->type = device->type;
 697         result->path = xstrdup(device->path);
 698         result->next = NULL;
 699         if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
 700                 result->pci_id++;
 701
 702         return result;
 703 }
 704
 705 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
 706 {
 707         struct intel_hba *result;
 708
 709         for (result = hba; result; result = result->next) {
 710                 if (result->type == device->type && strcmp(result->path, device->path) == 0)
 711                         break;
 712         }
 713         return result;
 714 }
 715
 716 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
 717 {
 718         struct intel_hba *hba;
 719
 720         /* check if disk attached to Intel HBA */
 721         hba = find_intel_hba(super->hba, device);
 722         if (hba != NULL)
 723                 return 1;
 724         /* Check if HBA is already attached to super */
 725         if (super->hba == NULL) {
 726                 super->hba = alloc_intel_hba(device);
 727                 return 1;
 728         }
 729
 730         hba = super->hba;
 731         /* Intel metadata allows for all disks attached to the same type HBA.
 732          * Do not support HBA types mixing
 733          */
 734         if (device->type != hba->type)
 735                 return 2;
 736
 737         /* Multiple same type HBAs can be used if they share the same OROM */
 738         const struct imsm_orom *device_orom = get_orom_by_device_id(device->dev_id);
 739
 740         if (device_orom != super->orom)
 741                 return 2;
 742
 743         while (hba->next)
 744                 hba = hba->next;
 745
 746         hba->next = alloc_intel_hba(device);
 747         return 1;
 748 }
 749
 750 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
 751 {
 752         struct sys_dev *list, *elem;
 753         char *disk_path;
 754
 755         if ((list = find_intel_devices()) == NULL)
 756                 return 0;
 757
 758         if (!is_fd_valid(fd))
 759                 disk_path  = (char *) devname;
 760         else
 761                 disk_path = diskfd_to_devpath(fd, 1, NULL);
 762
 763         if (!disk_path)
 764                 return 0;
 765
 766         for (elem = list; elem; elem = elem->next)
 767                 if (path_attached_to_hba(disk_path, elem->path))
 768                         break;
 769
 770         if (disk_path != devname)
 771                 free(disk_path);
 772
 773         return elem;
 774 }
 775
 776 static int find_intel_hba_capability(int fd, struct intel_super *super,
 777                                      char *devname);
 778
 779 static struct supertype *match_metadata_desc_imsm(char *arg)
 780 {
 781         struct supertype *st;
 782
 783         if (strcmp(arg, "imsm") != 0 &&
 784             strcmp(arg, "default") != 0
 785                 )
 786                 return NULL;
 787
 788         st = xcalloc(1, sizeof(*st));
 789         st->ss = &super_imsm;
 790         st->max_devs = IMSM_MAX_DEVICES;
 791         st->minor_version = 0;
 792         st->sb = NULL;
 793         return st;
 794 }
 795
 796 static __u8 *get_imsm_version(struct imsm_super *mpb)
 797 {
 798         return &mpb->sig[MPB_SIG_LEN];
 799 }
 800
 801 /* retrieve a disk directly from the anchor when the anchor is known to be
 802  * up-to-date, currently only at load time
 803  */
 804 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
 805 {
 806         if (index >= mpb->num_disks)
 807                 return NULL;
 808         return &mpb->disk[index];
 809 }
 810
 811 /* retrieve the disk description based on a index of the disk
 812  * in the sub-array
 813  */
 814 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
 815 {
 816         struct dl *d;
 817
 818         for (d = super->disks; d; d = d->next)
 819                 if (d->index == index)
 820                         return d;
 821
 822         return NULL;
 823 }
 824 /* retrieve a disk from the parsed metadata */
 825 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
 826 {
 827         struct dl *dl;
 828
 829         dl = get_imsm_dl_disk(super, index);
 830         if (dl)
 831                 return &dl->disk;
 832
 833         return NULL;
 834 }
 835
 836 /* generate a checksum directly from the anchor when the anchor is known to be
 837  * up-to-date, currently only at load or write_super after coalescing
 838  */
 839 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
 840 {
 841         __u32 end = mpb->mpb_size / sizeof(end);
 842         __u32 *p = (__u32 *) mpb;
 843         __u32 sum = 0;
 844
 845         while (end--) {
 846                 sum += __le32_to_cpu(*p);
 847                 p++;
 848         }
 849
 850         return sum - __le32_to_cpu(mpb->check_sum);
 851 }
 852
 853 static size_t sizeof_imsm_map(struct imsm_map *map)
 854 {
 855         return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
 856 }
 857
 858 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
 859 {
 860         /* A device can have 2 maps if it is in the middle of a migration.
 861          * If second_map is:
 862          *    MAP_0 - we return the first map
 863          *    MAP_1 - we return the second map if it exists, else NULL
 864          *    MAP_X - we return the second map if it exists, else the first
 865          */
 866         struct imsm_map *map = &dev->vol.map[0];
 867         struct imsm_map *map2 = NULL;
 868
 869         if (dev->vol.migr_state)
 870                 map2 = (void *)map + sizeof_imsm_map(map);
 871
 872         switch (second_map) {
 873         case MAP_0:
 874                 break;
 875         case MAP_1:
 876                 map = map2;
 877                 break;
 878         case MAP_X:
 879                 if (map2)
 880                         map = map2;
 881                 break;
 882         default:
 883                 map = NULL;
 884         }
 885         return map;
 886
 887 }
 888
 889 /* return the size of the device.
 890  * migr_state increases the returned size if map[0] were to be duplicated
 891  */
 892 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
 893 {
 894         size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
 895                       sizeof_imsm_map(get_imsm_map(dev, MAP_0));
 896
 897         /* migrating means an additional map */
 898         if (dev->vol.migr_state)
 899                 size += sizeof_imsm_map(get_imsm_map(dev, MAP_1));
 900         else if (migr_state)
 901                 size += sizeof_imsm_map(get_imsm_map(dev, MAP_0));
 902
 903         return size;
 904 }
 905
 906 /* retrieve disk serial number list from a metadata update */
 907 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
 908 {
 909         void *u = update;
 910         struct disk_info *inf;
 911
 912         inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
 913               sizeof_imsm_dev(&update->dev, 0);
 914
 915         return inf;
 916 }
 917
 918 /**
 919  * __get_imsm_dev() - Get device with index from imsm_super.
 920  * @mpb: &imsm_super pointer, not NULL.
 921  * @index: Device index.
 922  *
 923  * Function works as non-NULL, aborting in such a case,
 924  * when NULL would be returned.
 925  *
 926  * Device index should be in range 0 up to num_raid_devs.
 927  * Function assumes the index was already verified.
 928  * Index must be valid, otherwise abort() is called.
 929  *
 930  * Return: Pointer to corresponding imsm_dev.
 931  *
 932  */
 933 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
 934 {
 935         int offset;
 936         int i;
 937         void *_mpb = mpb;
 938
 939         if (index >= mpb->num_raid_devs)
 940                 goto error;
 941
 942         /* devices start after all disks */
 943         offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
 944
 945         for (i = 0; i <= index; i++, offset += sizeof_imsm_dev(_mpb + offset, 0))
 946                 if (i == index)
 947                         return _mpb + offset;
 948 error:
 949         pr_err("cannot find imsm_dev with index %u in imsm_super\n", index);
 950         abort();
 951 }
 952
 953 /**
 954  * get_imsm_dev() - Get device with index from intel_super.
 955  * @super: &intel_super pointer, not NULL.
 956  * @index: Device index.
 957  *
 958  * Function works as non-NULL, aborting in such a case,
 959  * when NULL would be returned.
 960  *
 961  * Device index should be in range 0 up to num_raid_devs.
 962  * Function assumes the index was already verified.
 963  * Index must be valid, otherwise abort() is called.
 964  *
 965  * Return: Pointer to corresponding imsm_dev.
 966  *
 967  */
 968 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
 969 {
 970         struct intel_dev *dv;
 971
 972         if (index >= super->anchor->num_raid_devs)
 973                 goto error;
 974
 975         for (dv = super->devlist; dv; dv = dv->next)
 976                 if (dv->index == index)
 977                         return dv->dev;
 978 error:
 979         pr_err("cannot find imsm_dev with index %u in intel_super\n", index);
 980         abort();
 981 }
 982
 983 static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr
 984                                                *addr)
 985 {
 986         return ((((__u64)__le32_to_cpu(addr->dw1)) << 16) |
 987                 __le16_to_cpu(addr->w1));
 988 }
 989
 990 static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec)
 991 {
 992         struct bbm_log_block_addr addr;
 993
 994         addr.w1 =  __cpu_to_le16((__u16)(sec & 0xffff));
 995         addr.dw1 = __cpu_to_le32((__u32)(sec >> 16) & 0xffffffff);
 996         return addr;
 997 }
 998
 999 /* get size of the bbm log */
1000 static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
1001 {
1002         if (!log || log->entry_count == 0)
1003                 return 0;
1004
1005         return sizeof(log->signature) +
1006                 sizeof(log->entry_count) +
1007                 log->entry_count * sizeof(struct bbm_log_entry);
1008 }
1009
1010 /* check if bad block is not partially stored in bbm log */
1011 static int is_stored_in_bbm(struct bbm_log *log, const __u8 idx, const unsigned
1012                             long long sector, const int length, __u32 *pos)
1013 {
1014         __u32 i;
1015
1016         for (i = *pos; i < log->entry_count; i++) {
1017                 struct bbm_log_entry *entry = &log->marked_block_entries[i];
1018                 unsigned long long bb_start;
1019                 unsigned long long bb_end;
1020
1021                 bb_start = __le48_to_cpu(&entry->defective_block_start);
1022                 bb_end = bb_start + (entry->marked_count + 1);
1023
1024                 if ((entry->disk_ordinal == idx) && (bb_start >= sector) &&
1025                     (bb_end <= sector + length)) {
1026                         *pos = i;
1027                         return 1;
1028                 }
1029         }
1030         return 0;
1031 }
1032
1033 /* record new bad block in bbm log */
1034 static int record_new_badblock(struct bbm_log *log, const __u8 idx, unsigned
1035                                long long sector, int length)
1036 {
1037         int new_bb = 0;
1038         __u32 pos = 0;
1039         struct bbm_log_entry *entry = NULL;
1040
1041         while (is_stored_in_bbm(log, idx, sector, length, &pos)) {
1042                 struct bbm_log_entry *e = &log->marked_block_entries[pos];
1043
1044                 if ((e->marked_count + 1 == BBM_LOG_MAX_LBA_ENTRY_VAL) &&
1045                     (__le48_to_cpu(&e->defective_block_start) == sector)) {
1046                         sector += BBM_LOG_MAX_LBA_ENTRY_VAL;
1047                         length -= BBM_LOG_MAX_LBA_ENTRY_VAL;
1048                         pos = pos + 1;
1049                         continue;
1050                 }
1051                 entry = e;
1052                 break;
1053         }
1054
1055         if (entry) {
1056                 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
1057                         BBM_LOG_MAX_LBA_ENTRY_VAL;
1058                 entry->defective_block_start = __cpu_to_le48(sector);
1059                 entry->marked_count = cnt - 1;
1060                 if (cnt == length)
1061                         return 1;
1062                 sector += cnt;
1063                 length -= cnt;
1064         }
1065
1066         new_bb = ROUND_UP(length, BBM_LOG_MAX_LBA_ENTRY_VAL) /
1067                 BBM_LOG_MAX_LBA_ENTRY_VAL;
1068         if (log->entry_count + new_bb > BBM_LOG_MAX_ENTRIES)
1069                 return 0;
1070
1071         while (length > 0) {
1072                 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
1073                         BBM_LOG_MAX_LBA_ENTRY_VAL;
1074                 struct bbm_log_entry *entry =
1075                         &log->marked_block_entries[log->entry_count];
1076
1077                 entry->defective_block_start = __cpu_to_le48(sector);
1078                 entry->marked_count = cnt - 1;
1079                 entry->disk_ordinal = idx;
1080
1081                 sector += cnt;
1082                 length -= cnt;
1083
1084                 log->entry_count++;
1085         }
1086
1087         return new_bb;
1088 }
1089
1090 /* clear all bad blocks for given disk */
1091 static void clear_disk_badblocks(struct bbm_log *log, const __u8 idx)
1092 {
1093         __u32 i = 0;
1094
1095         while (i < log->entry_count) {
1096                 struct bbm_log_entry *entries = log->marked_block_entries;
1097
1098                 if (entries[i].disk_ordinal == idx) {
1099                         if (i < log->entry_count - 1)
1100                                 entries[i] = entries[log->entry_count - 1];
1101                         log->entry_count--;
1102                 } else {
1103                         i++;
1104                 }
1105         }
1106 }
1107
1108 /* clear given bad block */
1109 static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned
1110                           long long sector, const int length) {
1111         __u32 i = 0;
1112
1113         while (i < log->entry_count) {
1114                 struct bbm_log_entry *entries = log->marked_block_entries;
1115
1116                 if ((entries[i].disk_ordinal == idx) &&
1117                     (__le48_to_cpu(&entries[i].defective_block_start) ==
1118                      sector) && (entries[i].marked_count + 1 == length)) {
1119                         if (i < log->entry_count - 1)
1120                                 entries[i] = entries[log->entry_count - 1];
1121                         log->entry_count--;
1122                         break;
1123                 }
1124                 i++;
1125         }
1126
1127         return 1;
1128 }
1129
1130 /* allocate and load BBM log from metadata */
1131 static int load_bbm_log(struct intel_super *super)
1132 {
1133         struct imsm_super *mpb = super->anchor;
1134         __u32 bbm_log_size =  __le32_to_cpu(mpb->bbm_log_size);
1135
1136         super->bbm_log = xcalloc(1, sizeof(struct bbm_log));
1137         if (!super->bbm_log)
1138                 return 1;
1139
1140         if (bbm_log_size) {
1141                 struct bbm_log *log = (void *)mpb +
1142                         __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1143
1144                 __u32 entry_count;
1145
1146                 if (bbm_log_size < sizeof(log->signature) +
1147                     sizeof(log->entry_count))
1148                         return 2;
1149
1150                 entry_count = __le32_to_cpu(log->entry_count);
1151                 if ((__le32_to_cpu(log->signature) != BBM_LOG_SIGNATURE) ||
1152                     (entry_count > BBM_LOG_MAX_ENTRIES))
1153                         return 3;
1154
1155                 if (bbm_log_size !=
1156                     sizeof(log->signature) + sizeof(log->entry_count) +
1157                     entry_count * sizeof(struct bbm_log_entry))
1158                         return 4;
1159
1160                 memcpy(super->bbm_log, log, bbm_log_size);
1161         } else {
1162                 super->bbm_log->signature = __cpu_to_le32(BBM_LOG_SIGNATURE);
1163                 super->bbm_log->entry_count = 0;
1164         }
1165
1166         return 0;
1167 }
1168
1169 /* checks if bad block is within volume boundaries */
1170 static int is_bad_block_in_volume(const struct bbm_log_entry *entry,
1171                         const unsigned long long start_sector,
1172                         const unsigned long long size)
1173 {
1174         unsigned long long bb_start;
1175         unsigned long long bb_end;
1176
1177         bb_start = __le48_to_cpu(&entry->defective_block_start);
1178         bb_end = bb_start + (entry->marked_count + 1);
1179
1180         if (((bb_start >= start_sector) && (bb_start < start_sector + size)) ||
1181             ((bb_end >= start_sector) && (bb_end <= start_sector + size)))
1182                 return 1;
1183
1184         return 0;
1185 }
1186
1187 /* get list of bad blocks on a drive for a volume */
1188 static void get_volume_badblocks(const struct bbm_log *log, const __u8 idx,
1189                         const unsigned long long start_sector,
1190                         const unsigned long long size,
1191                         struct md_bb *bbs)
1192 {
1193         __u32 count = 0;
1194         __u32 i;
1195
1196         for (i = 0; i < log->entry_count; i++) {
1197                 const struct bbm_log_entry *ent =
1198                         &log->marked_block_entries[i];
1199                 struct md_bb_entry *bb;
1200
1201                 if ((ent->disk_ordinal == idx) &&
1202                     is_bad_block_in_volume(ent, start_sector, size)) {
1203
1204                         if (!bbs->entries) {
1205                                 bbs->entries = xmalloc(BBM_LOG_MAX_ENTRIES *
1206                                                      sizeof(*bb));
1207                                 if (!bbs->entries)
1208                                         break;
1209                         }
1210
1211                         bb = &bbs->entries[count++];
1212                         bb->sector = __le48_to_cpu(&ent->defective_block_start);
1213                         bb->length = ent->marked_count + 1;
1214                 }
1215         }
1216         bbs->count = count;
1217 }
1218
1219 /*
1220  * for second_map:
1221  *  == MAP_0 get first map
1222  *  == MAP_1 get second map
1223  *  == MAP_X than get map according to the current migr_state
1224  */
1225 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
1226                                   int slot,
1227                                   int second_map)
1228 {
1229         struct imsm_map *map;
1230
1231         map = get_imsm_map(dev, second_map);
1232
1233         /* top byte identifies disk under rebuild */
1234         return __le32_to_cpu(map->disk_ord_tbl[slot]);
1235 }
1236
1237 #define ord_to_idx(ord) (((ord) << 8) >> 8)
1238 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
1239 {
1240         __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
1241
1242         return ord_to_idx(ord);
1243 }
1244
1245 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
1246 {
1247         map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
1248 }
1249
1250 static int get_imsm_disk_slot(struct imsm_map *map, const unsigned int idx)
1251 {
1252         int slot;
1253         __u32 ord;
1254
1255         for (slot = 0; slot < map->num_members; slot++) {
1256                 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
1257                 if (ord_to_idx(ord) == idx)
1258                         return slot;
1259         }
1260
1261         return IMSM_STATUS_ERROR;
1262 }
1263
1264 static int get_imsm_raid_level(struct imsm_map *map)
1265 {
1266         if (map->raid_level == 1) {
1267                 if (map->num_members == 2)
1268                         return 1;
1269                 else
1270                         return 10;
1271         }
1272
1273         return map->raid_level;
1274 }
1275
1276 /**
1277  * get_disk_slot_in_dev() - retrieve disk slot from &imsm_dev.
1278  * @super: &intel_super pointer, not NULL.
1279  * @dev_idx: imsm device index.
1280  * @idx: disk index.
1281  *
1282  * Return: Slot on success, IMSM_STATUS_ERROR otherwise.
1283  */
1284 static int get_disk_slot_in_dev(struct intel_super *super, const __u8 dev_idx,
1285                                 const unsigned int idx)
1286 {
1287         struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
1288         struct imsm_map *map = get_imsm_map(dev, MAP_0);
1289
1290         return get_imsm_disk_slot(map, idx);
1291 }
1292
1293 static int cmp_extent(const void *av, const void *bv)
1294 {
1295         const struct extent *a = av;
1296         const struct extent *b = bv;
1297         if (a->start < b->start)
1298                 return -1;
1299         if (a->start > b->start)
1300                 return 1;
1301         return 0;
1302 }
1303
1304 static int count_memberships(struct dl *dl, struct intel_super *super)
1305 {
1306         int memberships = 0;
1307         int i;
1308
1309         for (i = 0; i < super->anchor->num_raid_devs; i++)
1310                 if (get_disk_slot_in_dev(super, i, dl->index) >= 0)
1311                         memberships++;
1312
1313         return memberships;
1314 }
1315
1316 static __u32 imsm_min_reserved_sectors(struct intel_super *super);
1317
1318 static int split_ull(unsigned long long n, void *lo, void *hi)
1319 {
1320         if (lo == 0 || hi == 0)
1321                 return 1;
1322         __put_unaligned32(__cpu_to_le32((__u32)n), lo);
1323         __put_unaligned32(__cpu_to_le32((n >> 32)), hi);
1324         return 0;
1325 }
1326
1327 static unsigned long long join_u32(__u32 lo, __u32 hi)
1328 {
1329         return (unsigned long long)__le32_to_cpu(lo) |
1330                (((unsigned long long)__le32_to_cpu(hi)) << 32);
1331 }
1332
1333 static unsigned long long total_blocks(struct imsm_disk *disk)
1334 {
1335         if (disk == NULL)
1336                 return 0;
1337         return join_u32(disk->total_blocks_lo, disk->total_blocks_hi);
1338 }
1339
1340 /**
1341  * imsm_num_data_members() - get data drives count for an array.
1342  * @map: Map to analyze.
1343  *
1344  * num_data_members value represents minimal count of drives for level.
1345  * The name of the property could be misleading for RAID5 with asymmetric layout
1346  * because some data required to be calculated from parity.
1347  * The property is extracted from level and num_members value.
1348  *
1349  * Return: num_data_members value on success, zero otherwise.
1350  */
1351 static __u8 imsm_num_data_members(struct imsm_map *map)
1352 {
1353         switch (get_imsm_raid_level(map)) {
1354         case 0:
1355                 return map->num_members;
1356         case 1:
1357         case 10:
1358                 return map->num_members / 2;
1359         case 5:
1360                 return map->num_members - 1;
1361         default:
1362                 dprintf("unsupported raid level\n");
1363                 return 0;
1364         }
1365 }
1366
1367 static unsigned long long pba_of_lba0(struct imsm_map *map)
1368 {
1369         if (map == NULL)
1370                 return 0;
1371         return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi);
1372 }
1373
1374 static unsigned long long blocks_per_member(struct imsm_map *map)
1375 {
1376         if (map == NULL)
1377                 return 0;
1378         return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi);
1379 }
1380
1381 static unsigned long long num_data_stripes(struct imsm_map *map)
1382 {
1383         if (map == NULL)
1384                 return 0;
1385         return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
1386 }
1387
1388 static unsigned long long vol_curr_migr_unit(struct imsm_dev *dev)
1389 {
1390         if (dev == NULL)
1391                 return 0;
1392
1393         return join_u32(dev->vol.curr_migr_unit_lo, dev->vol.curr_migr_unit_hi);
1394 }
1395
1396 static unsigned long long imsm_dev_size(struct imsm_dev *dev)
1397 {
1398         if (dev == NULL)
1399                 return 0;
1400         return join_u32(dev->size_low, dev->size_high);
1401 }
1402
1403 static unsigned long long migr_chkp_area_pba(struct migr_record *migr_rec)
1404 {
1405         if (migr_rec == NULL)
1406                 return 0;
1407         return join_u32(migr_rec->ckpt_area_pba_lo,
1408                         migr_rec->ckpt_area_pba_hi);
1409 }
1410
1411 static unsigned long long current_migr_unit(struct migr_record *migr_rec)
1412 {
1413         if (migr_rec == NULL)
1414                 return 0;
1415         return join_u32(migr_rec->curr_migr_unit_lo,
1416                         migr_rec->curr_migr_unit_hi);
1417 }
1418
1419 static unsigned long long migr_dest_1st_member_lba(struct migr_record *migr_rec)
1420 {
1421         if (migr_rec == NULL)
1422                 return 0;
1423         return join_u32(migr_rec->dest_1st_member_lba_lo,
1424                         migr_rec->dest_1st_member_lba_hi);
1425 }
1426
1427 static unsigned long long get_num_migr_units(struct migr_record *migr_rec)
1428 {
1429         if (migr_rec == NULL)
1430                 return 0;
1431         return join_u32(migr_rec->num_migr_units_lo,
1432                         migr_rec->num_migr_units_hi);
1433 }
1434
1435 static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
1436 {
1437         split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
1438 }
1439
1440 /**
1441  * set_num_domains() - Set number of domains for an array.
1442  * @map: Map to be updated.
1443  *
1444  * num_domains property represents copies count of each data drive, thus make
1445  * it meaningful only for RAID1 and RAID10. IMSM supports two domains for
1446  * raid1 and raid10.
1447  */
1448 static void set_num_domains(struct imsm_map *map)
1449 {
1450         int level = get_imsm_raid_level(map);
1451
1452         if (level == 1 || level == 10)
1453                 map->num_domains = 2;
1454         else
1455                 map->num_domains = 1;
1456 }
1457
1458 static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
1459 {
1460         split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi);
1461 }
1462
1463 static void set_blocks_per_member(struct imsm_map *map, unsigned long long n)
1464 {
1465         split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi);
1466 }
1467
1468 static void set_num_data_stripes(struct imsm_map *map, unsigned long long n)
1469 {
1470         split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
1471 }
1472
1473 /**
1474  * update_num_data_stripes() - Calculate and update num_data_stripes value.
1475  * @map: map to be updated.
1476  * @dev_size: size of volume.
1477  *
1478  * num_data_stripes value is addictionally divided by num_domains, therefore for
1479  * levels where num_domains is not 1, nds is a part of real value.
1480  */
1481 static void update_num_data_stripes(struct imsm_map *map,
1482                                      unsigned long long dev_size)
1483 {
1484         unsigned long long nds = dev_size / imsm_num_data_members(map);
1485
1486         nds /= map->num_domains;
1487         nds /= map->blocks_per_strip;
1488         set_num_data_stripes(map, nds);
1489 }
1490
1491 static void set_vol_curr_migr_unit(struct imsm_dev *dev, unsigned long long n)
1492 {
1493         if (dev == NULL)
1494                 return;
1495
1496         split_ull(n, &dev->vol.curr_migr_unit_lo, &dev->vol.curr_migr_unit_hi);
1497 }
1498
1499 static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n)
1500 {
1501         split_ull(n, &dev->size_low, &dev->size_high);
1502 }
1503
1504 static void set_migr_chkp_area_pba(struct migr_record *migr_rec,
1505                                    unsigned long long n)
1506 {
1507         split_ull(n, &migr_rec->ckpt_area_pba_lo, &migr_rec->ckpt_area_pba_hi);
1508 }
1509
1510 static void set_current_migr_unit(struct migr_record *migr_rec,
1511                                   unsigned long long n)
1512 {
1513         split_ull(n, &migr_rec->curr_migr_unit_lo,
1514                   &migr_rec->curr_migr_unit_hi);
1515 }
1516
1517 static void set_migr_dest_1st_member_lba(struct migr_record *migr_rec,
1518                                          unsigned long long n)
1519 {
1520         split_ull(n, &migr_rec->dest_1st_member_lba_lo,
1521                   &migr_rec->dest_1st_member_lba_hi);
1522 }
1523
1524 static void set_num_migr_units(struct migr_record *migr_rec,
1525                                unsigned long long n)
1526 {
1527         split_ull(n, &migr_rec->num_migr_units_lo,
1528                   &migr_rec->num_migr_units_hi);
1529 }
1530
1531 static unsigned long long per_dev_array_size(struct imsm_map *map)
1532 {
1533         unsigned long long array_size = 0;
1534
1535         if (map == NULL)
1536                 return array_size;
1537
1538         array_size = num_data_stripes(map) * map->blocks_per_strip;
1539         if (get_imsm_raid_level(map) == 1 || get_imsm_raid_level(map) == 10)
1540                 array_size *= 2;
1541
1542         return array_size;
1543 }
1544
1545 static struct extent *get_extents(struct intel_super *super, struct dl *dl,
1546                                   int get_minimal_reservation)
1547 {
1548         /* find a list of used extents on the given physical device */
1549         int memberships = count_memberships(dl, super);
1550         struct extent *rv = xcalloc(memberships + 1, sizeof(struct extent));
1551         struct extent *e = rv;
1552         int i;
1553         __u32 reservation;
1554
1555         /* trim the reserved area for spares, so they can join any array
1556          * regardless of whether the OROM has assigned sectors from the
1557          * IMSM_RESERVED_SECTORS region
1558          */
1559         if (dl->index == -1 || get_minimal_reservation)
1560                 reservation = imsm_min_reserved_sectors(super);
1561         else
1562                 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1563
1564         for (i = 0; i < super->anchor->num_raid_devs; i++) {
1565                 struct imsm_dev *dev = get_imsm_dev(super, i);
1566                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1567
1568                 if (get_imsm_disk_slot(map, dl->index) >= 0) {
1569                         e->start = pba_of_lba0(map);
1570                         e->size = per_dev_array_size(map);
1571                         e->vol = i;
1572                         e++;
1573                 }
1574         }
1575         qsort(rv, memberships, sizeof(*rv), cmp_extent);
1576
1577         /* determine the start of the metadata
1578          * when no raid devices are defined use the default
1579          * ...otherwise allow the metadata to truncate the value
1580          * as is the case with older versions of imsm
1581          */
1582         if (memberships) {
1583                 struct extent *last = &rv[memberships - 1];
1584                 unsigned long long remainder;
1585
1586                 remainder = total_blocks(&dl->disk) - (last->start + last->size);
1587                 /* round down to 1k block to satisfy precision of the kernel
1588                  * 'size' interface
1589                  */
1590                 remainder &= ~1UL;
1591                 /* make sure remainder is still sane */
1592                 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
1593                         remainder = ROUND_UP(super->len, 512) >> 9;
1594                 if (reservation > remainder)
1595                         reservation = remainder;
1596         }
1597         e->start = total_blocks(&dl->disk) - reservation;
1598         e->size = 0;
1599         return rv;
1600 }
1601
1602 /* try to determine how much space is reserved for metadata from
1603  * the last get_extents() entry, otherwise fallback to the
1604  * default
1605  */
1606 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
1607 {
1608         struct extent *e;
1609         int i;
1610         __u32 rv;
1611
1612         /* for spares just return a minimal reservation which will grow
1613          * once the spare is picked up by an array
1614          */
1615         if (dl->index == -1)
1616                 return MPB_SECTOR_CNT;
1617
1618         e = get_extents(super, dl, 0);
1619         if (!e)
1620                 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1621
1622         /* scroll to last entry */
1623         for (i = 0; e[i].size; i++)
1624                 continue;
1625
1626         rv = total_blocks(&dl->disk) - e[i].start;
1627
1628         free(e);
1629
1630         return rv;
1631 }
1632
1633 static int is_spare(struct imsm_disk *disk)
1634 {
1635         return (disk->status & SPARE_DISK) == SPARE_DISK;
1636 }
1637
1638 static int is_configured(struct imsm_disk *disk)
1639 {
1640         return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
1641 }
1642
1643 static int is_failed(struct imsm_disk *disk)
1644 {
1645         return (disk->status & FAILED_DISK) == FAILED_DISK;
1646 }
1647
1648 static int is_journal(struct imsm_disk *disk)
1649 {
1650         return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
1651 }
1652
1653 /**
1654  * round_member_size_to_mb()- Round given size to closest MiB.
1655  * @size: size to round in sectors.
1656  */
1657 static inline unsigned long long round_member_size_to_mb(unsigned long long size)
1658 {
1659         return (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
1660 }
1661
1662 /**
1663  * round_size_to_mb()- Round given size.
1664  * @array_size: size to round in sectors.
1665  * @disk_count: count of data members.
1666  *
1667  * Get size per each data member and round it to closest MiB to ensure that data
1668  * splits evenly between members.
1669  *
1670  * Return: Array size, rounded down.
1671  */
1672 static inline unsigned long long round_size_to_mb(unsigned long long array_size,
1673                                                   unsigned int disk_count)
1674 {
1675         return round_member_size_to_mb(array_size / disk_count) * disk_count;
1676 }
1677
1678 static int able_to_resync(int raid_level, int missing_disks)
1679 {
1680         int max_missing_disks = 0;
1681
1682         switch (raid_level) {
1683         case 10:
1684                 max_missing_disks = 1;
1685                 break;
1686         default:
1687                 max_missing_disks = 0;
1688         }
1689         return missing_disks <= max_missing_disks;
1690 }
1691
1692 /* try to determine how much space is reserved for metadata from
1693  * the last get_extents() entry on the smallest active disk,
1694  * otherwise fallback to the default
1695  */
1696 static __u32 imsm_min_reserved_sectors(struct intel_super *super)
1697 {
1698         struct extent *e;
1699         int i;
1700         unsigned long long min_active;
1701         __u32 remainder;
1702         __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1703         struct dl *dl, *dl_min = NULL;
1704
1705         if (!super)
1706                 return rv;
1707
1708         min_active = 0;
1709         for (dl = super->disks; dl; dl = dl->next) {
1710                 if (dl->index < 0)
1711                         continue;
1712                 unsigned long long blocks = total_blocks(&dl->disk);
1713                 if (blocks < min_active || min_active == 0) {
1714                         dl_min = dl;
1715                         min_active = blocks;
1716                 }
1717         }
1718         if (!dl_min)
1719                 return rv;
1720
1721         /* find last lba used by subarrays on the smallest active disk */
1722         e = get_extents(super, dl_min, 0);
1723         if (!e)
1724                 return rv;
1725         for (i = 0; e[i].size; i++)
1726                 continue;
1727
1728         remainder = min_active - e[i].start;
1729         free(e);
1730
1731         /* to give priority to recovery we should not require full
1732            IMSM_RESERVED_SECTORS from the spare */
1733         rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION;
1734
1735         /* if real reservation is smaller use that value */
1736         return  (remainder < rv) ? remainder : rv;
1737 }
1738
1739 static bool is_gen_migration(struct imsm_dev *dev);
1740
1741 #define IMSM_4K_DIV 8
1742
1743 static __u64 blocks_per_migr_unit(struct intel_super *super,
1744                                   struct imsm_dev *dev);
1745
1746 static void print_imsm_dev(struct intel_super *super,
1747                            struct imsm_dev *dev,
1748                            char *uuid,
1749                            int disk_idx)
1750 {
1751         __u64 sz;
1752         int slot, i;
1753         struct imsm_map *map = get_imsm_map(dev, MAP_0);
1754         struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
1755         __u32 ord;
1756
1757         printf("\n");
1758         printf("[%.16s]:\n", dev->volume);
1759         printf("       Subarray : %d\n", super->current_vol);
1760         printf("           UUID : %s\n", uuid);
1761         printf("     RAID Level : %d", get_imsm_raid_level(map));
1762         if (map2)
1763                 printf(" <-- %d", get_imsm_raid_level(map2));
1764         printf("\n");
1765         printf("        Members : %d", map->num_members);
1766         if (map2)
1767                 printf(" <-- %d", map2->num_members);
1768         printf("\n");
1769         printf("          Slots : [");
1770         for (i = 0; i < map->num_members; i++) {
1771                 ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
1772                 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1773         }
1774         printf("]");
1775         if (map2) {
1776                 printf(" <-- [");
1777                 for (i = 0; i < map2->num_members; i++) {
1778                         ord = get_imsm_ord_tbl_ent(dev, i, MAP_1);
1779                         printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1780                 }
1781                 printf("]");
1782         }
1783         printf("\n");
1784         printf("    Failed disk : ");
1785         if (map->failed_disk_num == 0xff)
1786                 printf(STR_COMMON_NONE);
1787         else
1788                 printf("%i", map->failed_disk_num);
1789         printf("\n");
1790         slot = get_imsm_disk_slot(map, disk_idx);
1791         if (slot >= 0) {
1792                 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
1793                 printf("      This Slot : %d%s\n", slot,
1794                        ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
1795         } else
1796                 printf("      This Slot : ?\n");
1797         printf("    Sector Size : %u\n", super->sector_size);
1798         sz = imsm_dev_size(dev);
1799         printf("     Array Size : %llu%s\n",
1800                    (unsigned long long)sz * 512 / super->sector_size,
1801                human_size(sz * 512));
1802         sz = blocks_per_member(map);
1803         printf("   Per Dev Size : %llu%s\n",
1804                    (unsigned long long)sz * 512 / super->sector_size,
1805                human_size(sz * 512));
1806         printf("  Sector Offset : %llu\n",
1807                 pba_of_lba0(map) * 512 / super->sector_size);
1808         printf("    Num Stripes : %llu\n",
1809                 num_data_stripes(map));
1810         printf("     Chunk Size : %u KiB",
1811                 __le16_to_cpu(map->blocks_per_strip) / 2);
1812         if (map2)
1813                 printf(" <-- %u KiB",
1814                         __le16_to_cpu(map2->blocks_per_strip) / 2);
1815         printf("\n");
1816         printf("       Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
1817         printf("  Migrate State : ");
1818         if (dev->vol.migr_state) {
1819                 if (migr_type(dev) == MIGR_INIT)
1820                         printf("initialize\n");
1821                 else if (migr_type(dev) == MIGR_REBUILD)
1822                         printf("rebuild\n");
1823                 else if (migr_type(dev) == MIGR_VERIFY)
1824                         printf("check\n");
1825                 else if (migr_type(dev) == MIGR_GEN_MIGR)
1826                         printf("general migration\n");
1827                 else if (migr_type(dev) == MIGR_STATE_CHANGE)
1828                         printf("state change\n");
1829                 else if (migr_type(dev) == MIGR_REPAIR)
1830                         printf("repair\n");
1831                 else
1832                         printf("<unknown:%d>\n", migr_type(dev));
1833         } else
1834                 printf("idle\n");
1835         printf("      Map State : %s", map_state_str[map->map_state]);
1836         if (dev->vol.migr_state) {
1837                 struct imsm_map *map = get_imsm_map(dev, MAP_1);
1838
1839                 printf(" <-- %s", map_state_str[map->map_state]);
1840                 printf("\n     Checkpoint : %llu ", vol_curr_migr_unit(dev));
1841                 if (is_gen_migration(dev) && (slot > 1 || slot < 0))
1842                         printf("(N/A)");
1843                 else
1844                         printf("(%llu)", (unsigned long long)
1845                                    blocks_per_migr_unit(super, dev));
1846         }
1847         printf("\n");
1848         printf("    Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
1849                                          "dirty" : "clean");
1850         printf("     RWH Policy : ");
1851         if (dev->rwh_policy == RWH_OFF || dev->rwh_policy == RWH_MULTIPLE_OFF)
1852                 printf("off\n");
1853         else if (dev->rwh_policy == RWH_DISTRIBUTED)
1854                 printf("PPL distributed\n");
1855         else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
1856                 printf("PPL journaling drive\n");
1857         else if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
1858                 printf("Multiple distributed PPLs\n");
1859         else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
1860                 printf("Multiple PPLs on journaling drive\n");
1861         else if (dev->rwh_policy == RWH_BITMAP)
1862                 printf("Write-intent bitmap\n");
1863         else
1864                 printf("<unknown:%d>\n", dev->rwh_policy);
1865
1866         printf("      Volume ID : %u\n", dev->my_vol_raid_dev_num);
1867 }
1868
1869 static void print_imsm_disk(struct imsm_disk *disk,
1870                             int index,
1871                             __u32 reserved,
1872                             unsigned int sector_size) {
1873         char str[MAX_RAID_SERIAL_LEN + 1];
1874         __u64 sz;
1875
1876         if (index < -1 || !disk)
1877                 return;
1878
1879         printf("\n");
1880         snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1881         if (index >= 0)
1882                 printf("  Disk%02d Serial : %s\n", index, str);
1883         else
1884                 printf("    Disk Serial : %s\n", str);
1885         printf("          State :%s%s%s%s\n", is_spare(disk) ? " spare" : "",
1886                                               is_configured(disk) ? " active" : "",
1887                                               is_failed(disk) ? " failed" : "",
1888                                               is_journal(disk) ? " journal" : "");
1889         printf("             Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1890         sz = total_blocks(disk) - reserved;
1891         printf("    Usable Size : %llu%s\n",
1892                (unsigned long long)sz * 512 / sector_size,
1893                human_size(sz * 512));
1894 }
1895
1896 void convert_to_4k_imsm_migr_rec(struct intel_super *super)
1897 {
1898         struct migr_record *migr_rec = super->migr_rec;
1899
1900         migr_rec->blocks_per_unit /= IMSM_4K_DIV;
1901         migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
1902         split_ull((join_u32(migr_rec->post_migr_vol_cap,
1903                  migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
1904                  &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
1905         set_migr_chkp_area_pba(migr_rec,
1906                  migr_chkp_area_pba(migr_rec) / IMSM_4K_DIV);
1907         set_migr_dest_1st_member_lba(migr_rec,
1908                  migr_dest_1st_member_lba(migr_rec) / IMSM_4K_DIV);
1909 }
1910
1911 void convert_to_4k_imsm_disk(struct imsm_disk *disk)
1912 {
1913         set_total_blocks(disk, (total_blocks(disk)/IMSM_4K_DIV));
1914 }
1915
1916 void convert_to_4k(struct intel_super *super)
1917 {
1918         struct imsm_super *mpb = super->anchor;
1919         struct imsm_disk *disk;
1920         int i;
1921         __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
1922
1923         for (i = 0; i < mpb->num_disks ; i++) {
1924                 disk = __get_imsm_disk(mpb, i);
1925                 /* disk */
1926                 convert_to_4k_imsm_disk(disk);
1927         }
1928         for (i = 0; i < mpb->num_raid_devs; i++) {
1929                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1930                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1931                 /* dev */
1932                 set_imsm_dev_size(dev, imsm_dev_size(dev)/IMSM_4K_DIV);
1933                 set_vol_curr_migr_unit(dev,
1934                                        vol_curr_migr_unit(dev) / IMSM_4K_DIV);
1935
1936                 /* map0 */
1937                 set_blocks_per_member(map, blocks_per_member(map)/IMSM_4K_DIV);
1938                 map->blocks_per_strip /= IMSM_4K_DIV;
1939                 set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1940
1941                 if (dev->vol.migr_state) {
1942                         /* map1 */
1943                         map = get_imsm_map(dev, MAP_1);
1944                         set_blocks_per_member(map,
1945                             blocks_per_member(map)/IMSM_4K_DIV);
1946                         map->blocks_per_strip /= IMSM_4K_DIV;
1947                         set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1948                 }
1949         }
1950         if (bbm_log_size) {
1951                 struct bbm_log *log = (void *)mpb +
1952                         __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1953                 __u32 i;
1954
1955                 for (i = 0; i < log->entry_count; i++) {
1956                         struct bbm_log_entry *entry =
1957                                 &log->marked_block_entries[i];
1958
1959                         __u8 count = entry->marked_count + 1;
1960                         unsigned long long sector =
1961                                 __le48_to_cpu(&entry->defective_block_start);
1962
1963                         entry->defective_block_start =
1964                                 __cpu_to_le48(sector/IMSM_4K_DIV);
1965                         entry->marked_count = max(count/IMSM_4K_DIV, 1) - 1;
1966                 }
1967         }
1968
1969         mpb->check_sum = __gen_imsm_checksum(mpb);
1970 }
1971
1972 void examine_migr_rec_imsm(struct intel_super *super)
1973 {
1974         struct migr_record *migr_rec = super->migr_rec;
1975         struct imsm_super *mpb = super->anchor;
1976         int i;
1977
1978         for (i = 0; i < mpb->num_raid_devs; i++) {
1979                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1980                 struct imsm_map *map;
1981                 int slot = -1;
1982
1983                 if (is_gen_migration(dev) == false)
1984                                 continue;
1985
1986                 printf("\nMigration Record Information:");
1987
1988                 /* first map under migration */
1989                 map = get_imsm_map(dev, MAP_0);
1990
1991                 if (map)
1992                         slot = get_imsm_disk_slot(map, super->disks->index);
1993                 if (map == NULL || slot > 1 || slot < 0) {
1994                         printf(" Empty\n                              ");
1995                         printf("Examine one of first two disks in array\n");
1996                         break;
1997                 }
1998                 printf("\n                     Status : ");
1999                 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
2000                         printf("Normal\n");
2001                 else
2002                         printf("Contains Data\n");
2003                 printf("               Current Unit : %llu\n",
2004                        current_migr_unit(migr_rec));
2005                 printf("                     Family : %u\n",
2006                        __le32_to_cpu(migr_rec->family_num));
2007                 printf("                  Ascending : %u\n",
2008                        __le32_to_cpu(migr_rec->ascending_migr));
2009                 printf("            Blocks Per Unit : %u\n",
2010                        __le32_to_cpu(migr_rec->blocks_per_unit));
2011                 printf("       Dest. Depth Per Unit : %u\n",
2012                        __le32_to_cpu(migr_rec->dest_depth_per_unit));
2013                 printf("        Checkpoint Area pba : %llu\n",
2014                        migr_chkp_area_pba(migr_rec));
2015                 printf("           First member lba : %llu\n",
2016                        migr_dest_1st_member_lba(migr_rec));
2017                 printf("      Total Number of Units : %llu\n",
2018                        get_num_migr_units(migr_rec));
2019                 printf("             Size of volume : %llu\n",
2020                        join_u32(migr_rec->post_migr_vol_cap,
2021                                 migr_rec->post_migr_vol_cap_hi));
2022                 printf("       Record was read from : %u\n",
2023                        __le32_to_cpu(migr_rec->ckpt_read_disk_num));
2024
2025                 break;
2026         }
2027 }
2028
2029 void convert_from_4k_imsm_migr_rec(struct intel_super *super)
2030 {
2031         struct migr_record *migr_rec = super->migr_rec;
2032
2033         migr_rec->blocks_per_unit *= IMSM_4K_DIV;
2034         migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
2035         split_ull((join_u32(migr_rec->post_migr_vol_cap,
2036                  migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
2037                  &migr_rec->post_migr_vol_cap,
2038                  &migr_rec->post_migr_vol_cap_hi);
2039         set_migr_chkp_area_pba(migr_rec,
2040                  migr_chkp_area_pba(migr_rec) * IMSM_4K_DIV);
2041         set_migr_dest_1st_member_lba(migr_rec,
2042                  migr_dest_1st_member_lba(migr_rec) * IMSM_4K_DIV);
2043 }
2044
2045 void convert_from_4k(struct intel_super *super)
2046 {
2047         struct imsm_super *mpb = super->anchor;
2048         struct imsm_disk *disk;
2049         int i;
2050         __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
2051
2052         for (i = 0; i < mpb->num_disks ; i++) {
2053                 disk = __get_imsm_disk(mpb, i);
2054                 /* disk */
2055                 set_total_blocks(disk, (total_blocks(disk)*IMSM_4K_DIV));
2056         }
2057
2058         for (i = 0; i < mpb->num_raid_devs; i++) {
2059                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
2060                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2061                 /* dev */
2062                 set_imsm_dev_size(dev, imsm_dev_size(dev)*IMSM_4K_DIV);
2063                 set_vol_curr_migr_unit(dev,
2064                                        vol_curr_migr_unit(dev) * IMSM_4K_DIV);
2065
2066                 /* map0 */
2067                 set_blocks_per_member(map, blocks_per_member(map)*IMSM_4K_DIV);
2068                 map->blocks_per_strip *= IMSM_4K_DIV;
2069                 set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
2070
2071                 if (dev->vol.migr_state) {
2072                         /* map1 */
2073                         map = get_imsm_map(dev, MAP_1);
2074                         set_blocks_per_member(map,
2075                             blocks_per_member(map)*IMSM_4K_DIV);
2076                         map->blocks_per_strip *= IMSM_4K_DIV;
2077                         set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
2078                 }
2079         }
2080         if (bbm_log_size) {
2081                 struct bbm_log *log = (void *)mpb +
2082                         __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
2083                 __u32 i;
2084
2085                 for (i = 0; i < log->entry_count; i++) {
2086                         struct bbm_log_entry *entry =
2087                                 &log->marked_block_entries[i];
2088
2089                         __u8 count = entry->marked_count + 1;
2090                         unsigned long long sector =
2091                                 __le48_to_cpu(&entry->defective_block_start);
2092
2093                         entry->defective_block_start =
2094                                 __cpu_to_le48(sector*IMSM_4K_DIV);
2095                         entry->marked_count = count*IMSM_4K_DIV - 1;
2096                 }
2097         }
2098
2099         mpb->check_sum = __gen_imsm_checksum(mpb);
2100 }
2101
2102 /*******************************************************************************
2103  * function: imsm_check_attributes
2104  * Description: Function checks if features represented by attributes flags
2105  *              are supported by mdadm.
2106  * Parameters:
2107  *              attributes - Attributes read from metadata
2108  * Returns:
2109  *              0 - passed attributes contains unsupported features flags
2110  *              1 - all features are supported
2111  ******************************************************************************/
2112 static int imsm_check_attributes(__u32 attributes)
2113 {
2114         int ret_val = 1;
2115         __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
2116
2117         not_supported &= ~MPB_ATTRIB_IGNORED;
2118
2119         not_supported &= attributes;
2120         if (not_supported) {
2121                 pr_err("(IMSM): Unsupported attributes : %x\n",
2122                         (unsigned)__le32_to_cpu(not_supported));
2123                 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
2124                         dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
2125                         not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
2126                 }
2127                 if (not_supported & MPB_ATTRIB_2TB) {
2128                         dprintf("\t\tMPB_ATTRIB_2TB\n");
2129                         not_supported ^= MPB_ATTRIB_2TB;
2130                 }
2131                 if (not_supported & MPB_ATTRIB_RAID0) {
2132                         dprintf("\t\tMPB_ATTRIB_RAID0\n");
2133                         not_supported ^= MPB_ATTRIB_RAID0;
2134                 }
2135                 if (not_supported & MPB_ATTRIB_RAID1) {
2136                         dprintf("\t\tMPB_ATTRIB_RAID1\n");
2137                         not_supported ^= MPB_ATTRIB_RAID1;
2138                 }
2139                 if (not_supported & MPB_ATTRIB_RAID10) {
2140                         dprintf("\t\tMPB_ATTRIB_RAID10\n");
2141                         not_supported ^= MPB_ATTRIB_RAID10;
2142                 }
2143                 if (not_supported & MPB_ATTRIB_RAID1E) {
2144                         dprintf("\t\tMPB_ATTRIB_RAID1E\n");
2145                         not_supported ^= MPB_ATTRIB_RAID1E;
2146                 }
2147                 if (not_supported & MPB_ATTRIB_RAID5) {
2148                 dprintf("\t\tMPB_ATTRIB_RAID5\n");
2149                         not_supported ^= MPB_ATTRIB_RAID5;
2150                 }
2151                 if (not_supported & MPB_ATTRIB_RAIDCNG) {
2152                         dprintf("\t\tMPB_ATTRIB_RAIDCNG\n");
2153                         not_supported ^= MPB_ATTRIB_RAIDCNG;
2154                 }
2155                 if (not_supported & MPB_ATTRIB_BBM) {
2156                         dprintf("\t\tMPB_ATTRIB_BBM\n");
2157                 not_supported ^= MPB_ATTRIB_BBM;
2158                 }
2159                 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
2160                         dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n");
2161                         not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
2162                 }
2163                 if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) {
2164                         dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n");
2165                         not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE;
2166                 }
2167                 if (not_supported & MPB_ATTRIB_2TB_DISK) {
2168                         dprintf("\t\tMPB_ATTRIB_2TB_DISK\n");
2169                         not_supported ^= MPB_ATTRIB_2TB_DISK;
2170                 }
2171                 if (not_supported & MPB_ATTRIB_NEVER_USE2) {
2172                         dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n");
2173                         not_supported ^= MPB_ATTRIB_NEVER_USE2;
2174                 }
2175                 if (not_supported & MPB_ATTRIB_NEVER_USE) {
2176                         dprintf("\t\tMPB_ATTRIB_NEVER_USE\n");
2177                         not_supported ^= MPB_ATTRIB_NEVER_USE;
2178                 }
2179
2180                 if (not_supported)
2181                         dprintf("(IMSM): Unknown attributes : %x\n", not_supported);
2182
2183                 ret_val = 0;
2184         }
2185
2186         return ret_val;
2187 }
2188
2189 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
2190
2191 static void examine_super_imsm(struct supertype *st, char *homehost)
2192 {
2193         struct intel_super *super = st->sb;
2194         struct imsm_super *mpb = super->anchor;
2195         char str[MAX_SIGNATURE_LENGTH];
2196         int i;
2197         struct mdinfo info;
2198         char nbuf[64];
2199         __u32 sum;
2200         __u32 reserved = imsm_reserved_sectors(super, super->disks);
2201         struct dl *dl;
2202         time_t creation_time;
2203
2204         strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
2205         str[MPB_SIG_LEN-1] = '\0';
2206         printf("          Magic : %s\n", str);
2207         printf("        Version : %s\n", get_imsm_version(mpb));
2208         printf("    Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
2209         printf("         Family : %08x\n", __le32_to_cpu(mpb->family_num));
2210         printf("     Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
2211         creation_time = __le64_to_cpu(mpb->creation_time);
2212         printf("  Creation Time : %.24s\n",
2213                 creation_time ? ctime(&creation_time) : "Unknown");
2214         printf("     Attributes : ");
2215         if (imsm_check_attributes(mpb->attributes))
2216                 printf("All supported\n");
2217         else
2218                 printf("not supported\n");
2219         getinfo_super_imsm(st, &info, NULL);
2220         fname_from_uuid(&info, nbuf);
2221         printf("           UUID : %s\n", nbuf + 5);
2222         sum = __le32_to_cpu(mpb->check_sum);
2223         printf("       Checksum : %08x %s\n", sum,
2224                 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
2225         printf("    MPB Sectors : %d\n", mpb_sectors(mpb, super->sector_size));
2226         printf("          Disks : %d\n", mpb->num_disks);
2227         printf("   RAID Devices : %d\n", mpb->num_raid_devs);
2228         print_imsm_disk(__get_imsm_disk(mpb, super->disks->index),
2229                         super->disks->index, reserved, super->sector_size);
2230         if (get_imsm_bbm_log_size(super->bbm_log)) {
2231                 struct bbm_log *log = super->bbm_log;
2232
2233                 printf("\n");
2234                 printf("Bad Block Management Log:\n");
2235                 printf("       Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
2236                 printf("      Signature : %x\n", __le32_to_cpu(log->signature));
2237                 printf("    Entry Count : %d\n", __le32_to_cpu(log->entry_count));
2238         }
2239         for (i = 0; i < mpb->num_raid_devs; i++) {
2240                 struct mdinfo info;
2241                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
2242
2243                 super->current_vol = i;
2244                 getinfo_super_imsm(st, &info, NULL);
2245                 fname_from_uuid(&info, nbuf);
2246                 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
2247         }
2248         for (i = 0; i < mpb->num_disks; i++) {
2249                 if (i == super->disks->index)
2250                         continue;
2251                 print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved,
2252                                 super->sector_size);
2253         }
2254
2255         for (dl = super->disks; dl; dl = dl->next)
2256                 if (dl->index == -1)
2257                         print_imsm_disk(&dl->disk, -1, reserved,
2258                                         super->sector_size);
2259
2260         examine_migr_rec_imsm(super);
2261 }
2262
2263 static void brief_examine_super_imsm(struct supertype *st, int verbose)
2264 {
2265         /* We just write a generic IMSM ARRAY entry */
2266         struct mdinfo info;
2267         char nbuf[64];
2268
2269         getinfo_super_imsm(st, &info, NULL);
2270         fname_from_uuid(&info, nbuf);
2271         printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
2272 }
2273
2274 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
2275 {
2276         /* We just write a generic IMSM ARRAY entry */
2277         struct mdinfo info;
2278         char nbuf[64];
2279         char nbuf1[64];
2280         struct intel_super *super = st->sb;
2281         int i;
2282
2283         if (!super->anchor->num_raid_devs)
2284                 return;
2285
2286         getinfo_super_imsm(st, &info, NULL);
2287         fname_from_uuid(&info, nbuf);
2288         for (i = 0; i < super->anchor->num_raid_devs; i++) {
2289                 struct imsm_dev *dev = get_imsm_dev(super, i);
2290
2291                 super->current_vol = i;
2292                 getinfo_super_imsm(st, &info, NULL);
2293                 fname_from_uuid(&info, nbuf1);
2294                 printf("ARRAY " DEV_MD_DIR "%.16s container=%s member=%d UUID=%s\n",
2295                        dev->volume, nbuf + 5, i, nbuf1 + 5);
2296         }
2297 }
2298
2299 static void export_examine_super_imsm(struct supertype *st)
2300 {
2301         struct intel_super *super = st->sb;
2302         struct imsm_super *mpb = super->anchor;
2303         struct mdinfo info;
2304         char nbuf[64];
2305
2306         getinfo_super_imsm(st, &info, NULL);
2307         fname_from_uuid(&info, nbuf);
2308         printf("MD_METADATA=imsm\n");
2309         printf("MD_LEVEL=container\n");
2310         printf("MD_UUID=%s\n", nbuf+5);
2311         printf("MD_DEVICES=%u\n", mpb->num_disks);
2312         printf("MD_CREATION_TIME=%llu\n", __le64_to_cpu(mpb->creation_time));
2313 }
2314
2315 static void detail_super_imsm(struct supertype *st, char *homehost,
2316                               char *subarray)
2317 {
2318         struct mdinfo info;
2319         char nbuf[64];
2320         struct intel_super *super = st->sb;
2321         int temp_vol = super->current_vol;
2322
2323         if (subarray)
2324                 super->current_vol = strtoul(subarray, NULL, 10);
2325
2326         getinfo_super_imsm(st, &info, NULL);
2327         fname_from_uuid(&info, nbuf);
2328         printf("\n              UUID : %s\n", nbuf + 5);
2329
2330         super->current_vol = temp_vol;
2331 }
2332
2333 static void brief_detail_super_imsm(struct supertype *st, char *subarray)
2334 {
2335         struct mdinfo info;
2336         char nbuf[64];
2337         struct intel_super *super = st->sb;
2338         int temp_vol = super->current_vol;
2339
2340         if (subarray)
2341                 super->current_vol = strtoul(subarray, NULL, 10);
2342
2343         getinfo_super_imsm(st, &info, NULL);
2344         fname_from_uuid(&info, nbuf);
2345         printf(" UUID=%s", nbuf + 5);
2346
2347         super->current_vol = temp_vol;
2348 }
2349
2350 static int imsm_read_serial(int fd, char *devname, __u8 *serial,
2351                             size_t serial_buf_len);
2352 static void fd2devname(int fd, char *name);
2353
2354 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
2355 {
2356         /* dump an unsorted list of devices attached to AHCI Intel storage
2357          * controller, as well as non-connected ports
2358          */
2359         int hba_len = strlen(hba_path) + 1;
2360         struct dirent *ent;
2361         DIR *dir;
2362         char *path = NULL;
2363         int err = 0;
2364         unsigned long port_mask = (1 << port_count) - 1;
2365
2366         if (port_count > (int)sizeof(port_mask) * 8) {
2367                 if (verbose > 0)
2368                         pr_err("port_count %d out of range\n", port_count);
2369                 return 2;
2370         }
2371
2372         /* scroll through /sys/dev/block looking for devices attached to
2373          * this hba
2374          */
2375         dir = opendir("/sys/dev/block");
2376         if (!dir)
2377                 return 1;
2378
2379         for (ent = readdir(dir); ent; ent = readdir(dir)) {
2380                 int fd;
2381                 char model[64];
2382                 char vendor[64];
2383                 char buf[1024];
2384                 int major, minor;
2385                 char device[PATH_MAX];
2386                 char *c;
2387                 int port;
2388                 int type;
2389
2390                 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
2391                         continue;
2392                 path = devt_to_devpath(makedev(major, minor), 1, NULL);
2393                 if (!path)
2394                         continue;
2395                 if (!path_attached_to_hba(path, hba_path)) {
2396                         free(path);
2397                         path = NULL;
2398                         continue;
2399                 }
2400
2401                 /* retrieve the scsi device */
2402                 if (!devt_to_devpath(makedev(major, minor), 1, device)) {
2403                         if (verbose > 0)
2404                                 pr_err("failed to get device\n");
2405                         err = 2;
2406                         break;
2407                 }
2408                 if (devpath_to_char(device, "type", buf, sizeof(buf), 0)) {
2409                         err = 2;
2410                         break;
2411                 }
2412                 type = strtoul(buf, NULL, 10);
2413
2414                 /* if it's not a disk print the vendor and model */
2415                 if (!(type == 0 || type == 7 || type == 14)) {
2416                         vendor[0] = '\0';
2417                         model[0] = '\0';
2418
2419                         if (devpath_to_char(device, "vendor", buf,
2420                                             sizeof(buf), 0) == 0) {
2421                                 strncpy(vendor, buf, sizeof(vendor));
2422                                 vendor[sizeof(vendor) - 1] = '\0';
2423                                 c = (char *) &vendor[sizeof(vendor) - 1];
2424                                 while (isspace(*c) || *c == '\0')
2425                                         *c-- = '\0';
2426
2427                         }
2428
2429                         if (devpath_to_char(device, "model", buf,
2430                                             sizeof(buf), 0) == 0) {
2431                                 strncpy(model, buf, sizeof(model));
2432                                 model[sizeof(model) - 1] = '\0';
2433                                 c = (char *) &model[sizeof(model) - 1];
2434                                 while (isspace(*c) || *c == '\0')
2435                                         *c-- = '\0';
2436                         }
2437
2438                         if (vendor[0] && model[0])
2439                                 sprintf(buf, "%.64s %.64s", vendor, model);
2440                         else
2441                                 switch (type) { /* numbers from hald/linux/device.c */
2442                                 case 1: sprintf(buf, "tape"); break;
2443                                 case 2: sprintf(buf, "printer"); break;
2444                                 case 3: sprintf(buf, "processor"); break;
2445                                 case 4:
2446                                 case 5: sprintf(buf, "cdrom"); break;
2447                                 case 6: sprintf(buf, "scanner"); break;
2448                                 case 8: sprintf(buf, "media_changer"); break;
2449                                 case 9: sprintf(buf, "comm"); break;
2450                                 case 12: sprintf(buf, "raid"); break;
2451                                 default: sprintf(buf, "unknown");
2452                                 }
2453                 } else
2454                         buf[0] = '\0';
2455
2456                 /* chop device path to 'host%d' and calculate the port number */
2457                 c = strchr(&path[hba_len], '/');
2458                 if (!c) {
2459                         if (verbose > 0)
2460                                 pr_err("%s - invalid path name\n", path + hba_len);
2461                         err = 2;
2462                         break;
2463                 }
2464                 *c = '\0';
2465                 if ((sscanf(&path[hba_len], "ata%d", &port) == 1) ||
2466                    ((sscanf(&path[hba_len], "host%d", &port) == 1)))
2467                         port -= host_base;
2468                 else {
2469                         if (verbose > 0) {
2470                                 *c = '/'; /* repair the full string */
2471                                 pr_err("failed to determine port number for %s\n",
2472                                         path);
2473                         }
2474                         err = 2;
2475                         break;
2476                 }
2477
2478                 /* mark this port as used */
2479                 port_mask &= ~(1 << port);
2480
2481                 /* print out the device information */
2482                 if (buf[0]) {
2483                         printf("          Port%d : - non-disk device (%s) -\n", port, buf);
2484                         continue;
2485                 }
2486
2487                 fd = dev_open(ent->d_name, O_RDONLY);
2488                 if (!is_fd_valid(fd))
2489                         printf("          Port%d : - disk info unavailable -\n", port);
2490                 else {
2491                         fd2devname(fd, buf);
2492                         printf("          Port%d : %s", port, buf);
2493                         if (imsm_read_serial(fd, NULL, (__u8 *)buf,
2494                                              sizeof(buf)) == 0)
2495                                 printf(" (%s)\n", buf);
2496                         else
2497                                 printf(" ()\n");
2498                         close(fd);
2499                 }
2500                 free(path);
2501                 path = NULL;
2502         }
2503         if (path)
2504                 free(path);
2505         if (dir)
2506                 closedir(dir);
2507         if (err == 0) {
2508                 int i;
2509
2510                 for (i = 0; i < port_count; i++)
2511                         if (port_mask & (1 << i))
2512                                 printf("          Port%d : - no device attached -\n", i);
2513         }
2514
2515         return err;
2516 }
2517
2518 static int print_nvme_info(struct sys_dev *hba)
2519 {
2520         struct dirent *ent;
2521         DIR *dir;
2522
2523         dir = opendir("/sys/block/");
2524         if (!dir)
2525                 return 1;
2526
2527         for (ent = readdir(dir); ent; ent = readdir(dir)) {
2528                 char ns_path[PATH_MAX];
2529                 char cntrl_path[PATH_MAX];
2530                 char buf[PATH_MAX];
2531                 int fd = -1;
2532
2533                 if (!strstr(ent->d_name, "nvme"))
2534                         goto skip;
2535
2536                 fd = open_dev(ent->d_name);
2537                 if (!is_fd_valid(fd))
2538                         goto skip;
2539
2540                 if (!diskfd_to_devpath(fd, 0, ns_path) ||
2541                     !diskfd_to_devpath(fd, 1, cntrl_path))
2542                         goto skip;
2543
2544                 if (!path_attached_to_hba(cntrl_path, hba->path))
2545                         goto skip;
2546
2547                 if (!imsm_is_nvme_namespace_supported(fd, 0))
2548                         goto skip;
2549
2550                 fd2devname(fd, buf);
2551                 if (hba->type == SYS_DEV_VMD)
2552                         printf(" NVMe under VMD : %s", buf);
2553                 else if (hba->type == SYS_DEV_NVME)
2554                         printf("    NVMe Device : %s", buf);
2555
2556                 if (!imsm_read_serial(fd, NULL, (__u8 *)buf,
2557                                       sizeof(buf)))
2558                         printf(" (%s)\n", buf);
2559                 else
2560                         printf("()\n");
2561
2562 skip:
2563                 close_fd(&fd);
2564         }
2565
2566         closedir(dir);
2567         return 0;
2568 }
2569
2570 static void print_found_intel_controllers(struct sys_dev *elem)
2571 {
2572         for (; elem; elem = elem->next) {
2573                 pr_err("found Intel(R) ");
2574                 if (elem->type == SYS_DEV_SATA)
2575                         fprintf(stderr, "SATA ");
2576                 else if (elem->type == SYS_DEV_SAS)
2577                         fprintf(stderr, "SAS ");
2578                 else if (elem->type == SYS_DEV_NVME)
2579                         fprintf(stderr, "NVMe ");
2580
2581                 if (elem->type == SYS_DEV_VMD)
2582                         fprintf(stderr, "VMD domain");
2583                 else if (elem->type == SYS_DEV_SATA_VMD)
2584                         fprintf(stderr, "SATA VMD domain");
2585                 else
2586                         fprintf(stderr, "RAID controller");
2587
2588                 if (elem->pci_id)
2589                         fprintf(stderr, " at %s", elem->pci_id);
2590                 fprintf(stderr, ".\n");
2591         }
2592         fflush(stderr);
2593 }
2594
2595 static int ahci_get_port_count(const char *hba_path, int *port_count)
2596 {
2597         struct dirent *ent;
2598         DIR *dir;
2599         int host_base = -1;
2600
2601         *port_count = 0;
2602         if ((dir = opendir(hba_path)) == NULL)
2603                 return -1;
2604
2605         for (ent = readdir(dir); ent; ent = readdir(dir)) {
2606                 int host;
2607
2608                 if ((sscanf(ent->d_name, "ata%d", &host) != 1) &&
2609                    ((sscanf(ent->d_name, "host%d", &host) != 1)))
2610                         continue;
2611                 if (*port_count == 0)
2612                         host_base = host;
2613                 else if (host < host_base)
2614                         host_base = host;
2615
2616                 if (host + 1 > *port_count + host_base)
2617                         *port_count = host + 1 - host_base;
2618         }
2619         closedir(dir);
2620         return host_base;
2621 }
2622
2623 static void print_imsm_capability(const struct imsm_orom *orom)
2624 {
2625         printf("       Platform : Intel(R) ");
2626         if (orom->capabilities == 0 && orom->driver_features == 0)
2627                 printf("Matrix Storage Manager\n");
2628         else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6)
2629                 printf("Virtual RAID on CPU\n");
2630         else
2631                 printf("Rapid Storage Technology%s\n",
2632                         imsm_orom_is_enterprise(orom) ? " enterprise" : "");
2633         if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build) {
2634                 if (imsm_orom_is_vmd_without_efi(orom))
2635                         printf("        Version : %d.%d\n", orom->major_ver,
2636                                orom->minor_ver);
2637                 else
2638                         printf("        Version : %d.%d.%d.%d\n", orom->major_ver,
2639                                orom->minor_ver, orom->hotfix_ver, orom->build);
2640         }
2641         printf("    RAID Levels :%s%s%s%s%s\n",
2642                imsm_orom_has_raid0(orom) ? " raid0" : "",
2643                imsm_orom_has_raid1(orom) ? " raid1" : "",
2644                imsm_orom_has_raid1e(orom) ? " raid1e" : "",
2645                imsm_orom_has_raid10(orom) ? " raid10" : "",
2646                imsm_orom_has_raid5(orom) ? " raid5" : "");
2647         printf("    Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2648                imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
2649                imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
2650                imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
2651                imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
2652                imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
2653                imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
2654                imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
2655                imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
2656                imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
2657                imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
2658                imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
2659                imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
2660                imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
2661                imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
2662                imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
2663                imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
2664         printf("    2TB volumes :%s supported\n",
2665                (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not");
2666         printf("      2TB disks :%s supported\n",
2667                (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not");
2668         printf("      Max Disks : %d\n", orom->tds);
2669         printf("    Max Volumes : %d per array, %d per %s\n",
2670                orom->vpa, orom->vphba,
2671                imsm_orom_is_nvme(orom) ? "platform" : "controller");
2672         return;
2673 }
2674
2675 static void print_imsm_capability_export(const struct imsm_orom *orom)
2676 {
2677         printf("MD_FIRMWARE_TYPE=imsm\n");
2678         if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
2679                 printf("IMSM_VERSION=%d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
2680                                 orom->hotfix_ver, orom->build);
2681         printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n",
2682                         imsm_orom_has_raid0(orom) ? "raid0 " : "",
2683                         imsm_orom_has_raid1(orom) ? "raid1 " : "",
2684                         imsm_orom_has_raid1e(orom) ? "raid1e " : "",
2685                         imsm_orom_has_raid5(orom) ? "raid10 " : "",
2686                         imsm_orom_has_raid10(orom) ? "raid5 " : "");
2687         printf("IMSM_SUPPORTED_CHUNK_SIZES=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2688                         imsm_orom_has_chunk(orom, 2) ? "2k " : "",
2689                         imsm_orom_has_chunk(orom, 4) ? "4k " : "",
2690                         imsm_orom_has_chunk(orom, 8) ? "8k " : "",
2691                         imsm_orom_has_chunk(orom, 16) ? "16k " : "",
2692                         imsm_orom_has_chunk(orom, 32) ? "32k " : "",
2693                         imsm_orom_has_chunk(orom, 64) ? "64k " : "",
2694                         imsm_orom_has_chunk(orom, 128) ? "128k " : "",
2695                         imsm_orom_has_chunk(orom, 256) ? "256k " : "",
2696                         imsm_orom_has_chunk(orom, 512) ? "512k " : "",
2697                         imsm_orom_has_chunk(orom, 1024*1) ? "1M " : "",
2698                         imsm_orom_has_chunk(orom, 1024*2) ? "2M " : "",
2699                         imsm_orom_has_chunk(orom, 1024*4) ? "4M " : "",
2700                         imsm_orom_has_chunk(orom, 1024*8) ? "8M " : "",
2701                         imsm_orom_has_chunk(orom, 1024*16) ? "16M " : "",
2702                         imsm_orom_has_chunk(orom, 1024*32) ? "32M " : "",
2703                         imsm_orom_has_chunk(orom, 1024*64) ? "64M " : "");
2704         printf("IMSM_2TB_VOLUMES=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB) ? "yes" : "no");
2705         printf("IMSM_2TB_DISKS=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB_DISK) ? "yes" : "no");
2706         printf("IMSM_MAX_DISKS=%d\n",orom->tds);
2707         printf("IMSM_MAX_VOLUMES_PER_ARRAY=%d\n",orom->vpa);
2708         printf("IMSM_MAX_VOLUMES_PER_CONTROLLER=%d\n",orom->vphba);
2709 }
2710
2711 static int detail_platform_imsm(int verbose, int enumerate_only, char *controller_path)
2712 {
2713         /* There are two components to imsm platform support, the ahci SATA
2714          * controller and the option-rom.  To find the SATA controller we
2715          * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
2716          * controller with the Intel vendor id is present.  This approach
2717          * allows mdadm to leverage the kernel's ahci detection logic, with the
2718          * caveat that if ahci.ko is not loaded mdadm will not be able to
2719          * detect platform raid capabilities.  The option-rom resides in a
2720          * platform "Adapter ROM".  We scan for its signature to retrieve the
2721          * platform capabilities.  If raid support is disabled in the BIOS the
2722          * option-rom capability structure will not be available.
2723          */
2724         struct sys_dev *list, *hba;
2725         int host_base = 0;
2726         int port_count = 0;
2727         int result=1;
2728
2729         if (enumerate_only) {
2730                 if (check_no_platform())
2731                         return 0;
2732                 list = find_intel_devices();
2733                 if (!list)
2734                         return 2;
2735                 for (hba = list; hba; hba = hba->next) {
2736                         if (find_imsm_capability(hba)) {
2737                                 result = 0;
2738                                 break;
2739                         }
2740                         else
2741                                 result = 2;
2742                 }
2743                 return result;
2744         }
2745
2746         list = find_intel_devices();
2747         if (!list) {
2748                 if (verbose > 0)
2749                         pr_err("no active Intel(R) RAID controller found.\n");
2750                 return 2;
2751         } else if (verbose > 0)
2752                 print_found_intel_controllers(list);
2753
2754         for (hba = list; hba; hba = hba->next) {
2755                 if (controller_path && (compare_paths(hba->path, controller_path) != 0))
2756                         continue;
2757                 if (!find_imsm_capability(hba)) {
2758                         char buf[PATH_MAX];
2759                         pr_err("imsm capabilities not found for controller: %s (type %s)\n",
2760                                   hba->type == SYS_DEV_VMD || hba->type == SYS_DEV_SATA_VMD ?
2761                                   vmd_domain_to_controller(hba, buf) :
2762                                   hba->path, get_sys_dev_type(hba->type));
2763                         continue;
2764                 }
2765                 result = 0;
2766         }
2767
2768         if (controller_path && result == 1) {
2769                 pr_err("no active Intel(R) RAID controller found under %s\n",
2770                                 controller_path);
2771                 return result;
2772         }
2773
2774         const struct orom_entry *entry;
2775
2776         for (entry = orom_entries; entry; entry = entry->next) {
2777                 if (entry->type == SYS_DEV_VMD) {
2778                         print_imsm_capability(&entry->orom);
2779                         printf(" 3rd party NVMe :%s supported\n",
2780                             imsm_orom_has_tpv_support(&entry->orom)?"":" not");
2781                         for (hba = list; hba; hba = hba->next) {
2782                                 if (hba->type == SYS_DEV_VMD) {
2783                                         char buf[PATH_MAX];
2784                                         printf(" I/O Controller : %s (%s)\n",
2785                                                 vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
2786                                         if (print_nvme_info(hba)) {
2787                                                 if (verbose > 0)
2788                                                         pr_err("failed to get devices attached to VMD domain.\n");
2789                                                 result |= 2;
2790                                         }
2791                                 }
2792                         }
2793                         printf("\n");
2794                         continue;
2795                 }
2796
2797                 print_imsm_capability(&entry->orom);
2798                 if (entry->type == SYS_DEV_NVME) {
2799                         for (hba = list; hba; hba = hba->next) {
2800                                 if (hba->type == SYS_DEV_NVME)
2801                                         print_nvme_info(hba);
2802                         }
2803                         printf("\n");
2804                         continue;
2805                 }
2806
2807                 struct devid_list *devid;
2808                 for (devid = entry->devid_list; devid; devid = devid->next) {
2809                         hba = device_by_id(devid->devid);
2810                         if (!hba)
2811                                 continue;
2812
2813                         printf(" I/O Controller : %s (%s)\n",
2814                                 hba->path, get_sys_dev_type(hba->type));
2815                         if (hba->type == SYS_DEV_SATA || hba->type == SYS_DEV_SATA_VMD) {
2816                                 host_base = ahci_get_port_count(hba->path, &port_count);
2817                                 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
2818                                         if (verbose > 0)
2819                                                 pr_err("failed to enumerate ports on %s controller at %s.\n",
2820                                                         get_sys_dev_type(hba->type), hba->pci_id);
2821                                         result |= 2;
2822                                 }
2823                         }
2824                 }
2825                 printf("\n");
2826         }
2827
2828         return result;
2829 }
2830
2831 static int export_detail_platform_imsm(int verbose, char *controller_path)
2832 {
2833         struct sys_dev *list, *hba;
2834         int result=1;
2835
2836         list = find_intel_devices();
2837         if (!list) {
2838                 if (verbose > 0)
2839                         pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_INTEL_DEVICES\n");
2840                 result = 2;
2841                 return result;
2842         }
2843
2844         for (hba = list; hba; hba = hba->next) {
2845                 if (controller_path && (compare_paths(hba->path,controller_path) != 0))
2846                         continue;
2847                 if (!find_imsm_capability(hba) && verbose > 0) {
2848                         char buf[PATH_MAX];
2849                         pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",
2850                                 hba->type == SYS_DEV_VMD || hba->type == SYS_DEV_SATA_VMD ?
2851                                 vmd_domain_to_controller(hba, buf) : hba->path);
2852                 }
2853                 else
2854                         result = 0;
2855         }
2856
2857         const struct orom_entry *entry;
2858
2859         for (entry = orom_entries; entry; entry = entry->next) {
2860                 if (entry->type == SYS_DEV_VMD || entry->type == SYS_DEV_SATA_VMD) {
2861                         for (hba = list; hba; hba = hba->next)
2862                                 print_imsm_capability_export(&entry->orom);
2863                         continue;
2864                 }
2865                 print_imsm_capability_export(&entry->orom);
2866         }
2867
2868         return result;
2869 }
2870
2871 static int match_home_imsm(struct supertype *st, char *homehost)
2872 {
2873         /* the imsm metadata format does not specify any host
2874          * identification information.  We return -1 since we can never
2875          * confirm nor deny whether a given array is "meant" for this
2876          * host.  We rely on compare_super and the 'family_num' fields to
2877          * exclude member disks that do not belong, and we rely on
2878          * mdadm.conf to specify the arrays that should be assembled.
2879          * Auto-assembly may still pick up "foreign" arrays.
2880          */
2881
2882         return -1;
2883 }
2884
2885 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
2886 {
2887         /* The uuid returned here is used for:
2888          *  uuid to put into bitmap file (Create, Grow)
2889          *  uuid for backup header when saving critical section (Grow)
2890          *  comparing uuids when re-adding a device into an array
2891          *    In these cases the uuid required is that of the data-array,
2892          *    not the device-set.
2893          *  uuid to recognise same set when adding a missing device back
2894          *    to an array.   This is a uuid for the device-set.
2895          *
2896          * For each of these we can make do with a truncated
2897          * or hashed uuid rather than the original, as long as
2898          * everyone agrees.
2899          * In each case the uuid required is that of the data-array,
2900          * not the device-set.
2901          */
2902         /* imsm does not track uuid's so we synthesis one using sha1 on
2903          * - The signature (Which is constant for all imsm array, but no matter)
2904          * - the orig_family_num of the container
2905          * - the index number of the volume
2906          * - the 'serial' number of the volume.
2907          * Hopefully these are all constant.
2908          */
2909         struct intel_super *super = st->sb;
2910
2911         char buf[20];
2912         struct sha1_ctx ctx;
2913         struct imsm_dev *dev = NULL;
2914         __u32 family_num;
2915
2916         /* some mdadm versions failed to set ->orig_family_num, in which
2917          * case fall back to ->family_num.  orig_family_num will be
2918          * fixed up with the first metadata update.
2919          */
2920         family_num = super->anchor->orig_family_num;
2921         if (family_num == 0)
2922                 family_num = super->anchor->family_num;
2923         sha1_init_ctx(&ctx);
2924         sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
2925         sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
2926         if (super->current_vol >= 0)
2927                 dev = get_imsm_dev(super, super->current_vol);
2928         if (dev) {
2929                 __u32 vol = super->current_vol;
2930                 sha1_process_bytes(&vol, sizeof(vol), &ctx);
2931                 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
2932         }
2933         sha1_finish_ctx(&ctx, buf);
2934         memcpy(uuid, buf, 4*4);
2935 }
2936
2937 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
2938 {
2939         /* migr_strip_size when repairing or initializing parity */
2940         struct imsm_map *map = get_imsm_map(dev, MAP_0);
2941         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2942
2943         switch (get_imsm_raid_level(map)) {
2944         case 5:
2945         case 10:
2946                 return chunk;
2947         default:
2948                 return 128*1024 >> 9;
2949         }
2950 }
2951
2952 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
2953 {
2954         /* migr_strip_size when rebuilding a degraded disk, no idea why
2955          * this is different than migr_strip_size_resync(), but it's good
2956          * to be compatible
2957          */
2958         struct imsm_map *map = get_imsm_map(dev, MAP_1);
2959         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2960
2961         switch (get_imsm_raid_level(map)) {
2962         case 1:
2963         case 10:
2964                 if (map->num_members % map->num_domains == 0)
2965                         return 128*1024 >> 9;
2966                 else
2967                         return chunk;
2968         case 5:
2969                 return max((__u32) 64*1024 >> 9, chunk);
2970         default:
2971                 return 128*1024 >> 9;
2972         }
2973 }
2974
2975 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
2976 {
2977         struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2978         struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2979         __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
2980         __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
2981
2982         return max((__u32) 1, hi_chunk / lo_chunk);
2983 }
2984
2985 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
2986 {
2987         struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2988         int level = get_imsm_raid_level(lo);
2989
2990         if (level == 1 || level == 10) {
2991                 struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2992
2993                 return hi->num_domains;
2994         } else
2995                 return num_stripes_per_unit_resync(dev);
2996 }
2997
2998 static unsigned long long calc_component_size(struct imsm_map *map,
2999                                               struct imsm_dev *dev)
3000 {
3001         unsigned long long component_size;
3002         unsigned long long dev_size = imsm_dev_size(dev);
3003         long long calc_dev_size = 0;
3004         unsigned int member_disks = imsm_num_data_members(map);
3005
3006         if (member_disks == 0)
3007                 return 0;
3008
3009         component_size = per_dev_array_size(map);
3010         calc_dev_size = component_size * member_disks;
3011
3012         /* Component size is rounded to 1MB so difference between size from
3013          * metadata and size calculated from num_data_stripes equals up to
3014          * 2048 blocks per each device. If the difference is higher it means
3015          * that array size was expanded and num_data_stripes was not updated.
3016          */
3017         if (llabs(calc_dev_size - (long long)dev_size) >
3018             (1 << SECT_PER_MB_SHIFT) * member_disks) {
3019                 component_size = dev_size / member_disks;
3020                 dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n",
3021                         component_size / map->blocks_per_strip,
3022                         num_data_stripes(map));
3023         }
3024
3025         return component_size;
3026 }
3027
3028 static __u32 parity_segment_depth(struct imsm_dev *dev)
3029 {
3030         struct imsm_map *map = get_imsm_map(dev, MAP_0);
3031         __u32 chunk =  __le32_to_cpu(map->blocks_per_strip);
3032
3033         switch(get_imsm_raid_level(map)) {
3034         case 1:
3035         case 10:
3036                 return chunk * map->num_domains;
3037         case 5:
3038                 return chunk * map->num_members;
3039         default:
3040                 return chunk;
3041         }
3042 }
3043
3044 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
3045 {
3046         struct imsm_map *map = get_imsm_map(dev, MAP_1);
3047         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
3048         __u32 strip = block / chunk;
3049
3050         switch (get_imsm_raid_level(map)) {
3051         case 1:
3052         case 10: {
3053                 __u32 vol_strip = (strip * map->num_domains) + 1;
3054                 __u32 vol_stripe = vol_strip / map->num_members;
3055
3056                 return vol_stripe * chunk + block % chunk;
3057         } case 5: {
3058                 __u32 stripe = strip / (map->num_members - 1);
3059
3060                 return stripe * chunk + block % chunk;
3061         }
3062         default:
3063                 return 0;
3064         }
3065 }
3066
3067 static __u64 blocks_per_migr_unit(struct intel_super *super,
3068                                   struct imsm_dev *dev)
3069 {
3070         /* calculate the conversion factor between per member 'blocks'
3071          * (md/{resync,rebuild}_start) and imsm migration units, return
3072          * 0 for the 'not migrating' and 'unsupported migration' cases
3073          */
3074         if (!dev->vol.migr_state)
3075                 return 0;
3076
3077         switch (migr_type(dev)) {
3078         case MIGR_GEN_MIGR: {
3079                 struct migr_record *migr_rec = super->migr_rec;
3080                 return __le32_to_cpu(migr_rec->blocks_per_unit);
3081         }
3082         case MIGR_VERIFY:
3083         case MIGR_REPAIR:
3084         case MIGR_INIT: {
3085                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
3086                 __u32 stripes_per_unit;
3087                 __u32 blocks_per_unit;
3088                 __u32 parity_depth;
3089                 __u32 migr_chunk;
3090                 __u32 block_map;
3091                 __u32 block_rel;
3092                 __u32 segment;
3093                 __u32 stripe;
3094                 __u8  disks;
3095
3096                 /* yes, this is really the translation of migr_units to
3097                  * per-member blocks in the 'resync' case
3098                  */
3099                 stripes_per_unit = num_stripes_per_unit_resync(dev);
3100                 migr_chunk = migr_strip_blocks_resync(dev);
3101                 disks = imsm_num_data_members(map);
3102                 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
3103                 stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
3104                 segment = blocks_per_unit / stripe;
3105                 block_rel = blocks_per_unit - segment * stripe;
3106                 parity_depth = parity_segment_depth(dev);
3107                 block_map = map_migr_block(dev, block_rel);
3108                 return block_map + parity_depth * segment;
3109         }
3110         case MIGR_REBUILD: {
3111                 __u32 stripes_per_unit;
3112                 __u32 migr_chunk;
3113
3114                 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
3115                 migr_chunk = migr_strip_blocks_rebuild(dev);
3116                 return migr_chunk * stripes_per_unit;
3117         }
3118         case MIGR_STATE_CHANGE:
3119         default:
3120                 return 0;
3121         }
3122 }
3123
3124 static int imsm_level_to_layout(int level)
3125 {
3126         switch (level) {
3127         case 0:
3128         case 1:
3129                 return 0;
3130         case 5:
3131         case 6:
3132                 return ALGORITHM_LEFT_ASYMMETRIC;
3133         case 10:
3134                 return 0x102;
3135         }
3136         return UnSet;
3137 }
3138
3139 /*******************************************************************************
3140  * Function:    read_imsm_migr_rec
3141  * Description: Function reads imsm migration record from last sector of disk
3142  * Parameters:
3143  *      fd      : disk descriptor
3144  *      super   : metadata info
3145  * Returns:
3146  *       0 : success,
3147  *      -1 : fail
3148  ******************************************************************************/
3149 static int read_imsm_migr_rec(int fd, struct intel_super *super)
3150 {
3151         int ret_val = -1;
3152         unsigned int sector_size = super->sector_size;
3153         unsigned long long dsize;
3154
3155         get_dev_size(fd, NULL, &dsize);
3156         if (lseek64(fd, dsize - (sector_size*MIGR_REC_SECTOR_POSITION),
3157                    SEEK_SET) < 0) {
3158                 pr_err("Cannot seek to anchor block: %s\n",
3159                        strerror(errno));
3160                 goto out;
3161         }
3162         if ((unsigned int)read(fd, super->migr_rec_buf,
3163             MIGR_REC_BUF_SECTORS*sector_size) !=
3164             MIGR_REC_BUF_SECTORS*sector_size) {
3165                 pr_err("Cannot read migr record block: %s\n",
3166                        strerror(errno));
3167                 goto out;
3168         }
3169         ret_val = 0;
3170         if (sector_size == 4096)
3171                 convert_from_4k_imsm_migr_rec(super);
3172
3173 out:
3174         return ret_val;
3175 }
3176
3177 static struct imsm_dev *imsm_get_device_during_migration(
3178         struct intel_super *super)
3179 {
3180
3181         struct intel_dev *dv;
3182
3183         for (dv = super->devlist; dv; dv = dv->next) {
3184                 if (is_gen_migration(dv->dev))
3185                         return dv->dev;
3186         }
3187         return NULL;
3188 }
3189
3190 /*******************************************************************************
3191  * Function:    load_imsm_migr_rec
3192  * Description: Function reads imsm migration record (it is stored at the last
3193  *              sector of disk)
3194  * Parameters:
3195  *      super   : imsm internal array info
3196  * Returns:
3197  *       0 : success
3198  *      -1 : fail
3199  *      -2 : no migration in progress
3200  ******************************************************************************/
3201 static int load_imsm_migr_rec(struct intel_super *super)
3202 {
3203         struct dl *dl;
3204         char nm[30];
3205         int retval = -1;
3206         int fd = -1;
3207         struct imsm_dev *dev;
3208         struct imsm_map *map;
3209         int slot = -1;
3210         int keep_fd = 1;
3211
3212         /* find map under migration */
3213         dev = imsm_get_device_during_migration(super);
3214         /* nothing to load,no migration in progress?
3215         */
3216         if (dev == NULL)
3217                 return -2;
3218
3219         map = get_imsm_map(dev, MAP_0);
3220         if (!map)
3221                 return -1;
3222
3223         for (dl = super->disks; dl; dl = dl->next) {
3224                 /* skip spare and failed disks
3225                  */
3226                 if (dl->index < 0)
3227                         continue;
3228                 /* read only from one of the first two slots
3229                  */
3230                 slot = get_imsm_disk_slot(map, dl->index);
3231                 if (slot > 1 || slot < 0)
3232                         continue;
3233
3234                 if (!is_fd_valid(dl->fd)) {
3235                         sprintf(nm, "%d:%d", dl->major, dl->minor);
3236                         fd = dev_open(nm, O_RDONLY);
3237
3238                         if (is_fd_valid(fd)) {
3239                                 keep_fd = 0;
3240                                 break;
3241                         }
3242                 } else {
3243                         fd = dl->fd;
3244                         break;
3245                 }
3246         }
3247
3248         if (!is_fd_valid(fd))
3249                 return retval;
3250         retval = read_imsm_migr_rec(fd, super);
3251         if (!keep_fd)
3252                 close(fd);
3253
3254         return retval;
3255 }
3256
3257 /*******************************************************************************
3258  * function: imsm_create_metadata_checkpoint_update
3259  * Description: It creates update for checkpoint change.
3260  * Parameters:
3261  *      super   : imsm internal array info
3262  *      u       : pointer to prepared update
3263  * Returns:
3264  *      Uptate length.
3265  *      If length is equal to 0, input pointer u contains no update
3266  ******************************************************************************/
3267 static int imsm_create_metadata_checkpoint_update(
3268         struct intel_super *super,
3269         struct imsm_update_general_migration_checkpoint **u)
3270 {
3271
3272         int update_memory_size = 0;
3273
3274         dprintf("(enter)\n");
3275
3276         if (u == NULL)
3277                 return 0;
3278         *u = NULL;
3279
3280         /* size of all update data without anchor */
3281         update_memory_size =
3282                 sizeof(struct imsm_update_general_migration_checkpoint);
3283
3284         *u = xcalloc(1, update_memory_size);
3285         if (*u == NULL) {
3286                 dprintf("error: cannot get memory\n");
3287                 return 0;
3288         }
3289         (*u)->type = update_general_migration_checkpoint;
3290         (*u)->curr_migr_unit = current_migr_unit(super->migr_rec);
3291         dprintf("prepared for %llu\n", (unsigned long long)(*u)->curr_migr_unit);
3292
3293         return update_memory_size;
3294 }
3295
3296 static void imsm_update_metadata_locally(struct supertype *st,
3297                                          void *buf, int len);
3298
3299 /*******************************************************************************
3300  * Function:    write_imsm_migr_rec
3301  * Description: Function writes imsm migration record
3302  *              (at the last sector of disk)
3303  * Parameters:
3304  *      super   : imsm internal array info
3305  * Returns:
3306  *       0 : success
3307  *      -1 : if fail
3308  ******************************************************************************/
3309 static int write_imsm_migr_rec(struct supertype *st)
3310 {
3311         struct intel_super *super = st->sb;
3312         unsigned int sector_size = super->sector_size;
3313         unsigned long long dsize;
3314         int retval = -1;
3315         struct dl *sd;
3316         int len;
3317         struct imsm_update_general_migration_checkpoint *u;
3318         struct imsm_dev *dev;
3319         struct imsm_map *map;
3320
3321         /* find map under migration */
3322         dev = imsm_get_device_during_migration(super);
3323         /* if no migration, write buffer anyway to clear migr_record
3324          * on disk based on first available device
3325         */
3326         if (dev == NULL)
3327                 dev = get_imsm_dev(super, super->current_vol < 0 ? 0 :
3328                                           super->current_vol);
3329
3330         map = get_imsm_map(dev, MAP_0);
3331
3332         if (sector_size == 4096)
3333                 convert_to_4k_imsm_migr_rec(super);
3334         for (sd = super->disks ; sd ; sd = sd->next) {
3335                 int slot = -1;
3336
3337                 /* skip failed and spare devices */
3338                 if (sd->index < 0)
3339                         continue;
3340                 /* write to 2 first slots only */
3341                 if (map)
3342                         slot = get_imsm_disk_slot(map, sd->index);
3343                 if (map == NULL || slot > 1 || slot < 0)
3344                         continue;
3345
3346                 get_dev_size(sd->fd, NULL, &dsize);
3347                 if (lseek64(sd->fd, dsize - (MIGR_REC_SECTOR_POSITION *
3348                     sector_size),
3349                     SEEK_SET) < 0) {
3350                         pr_err("Cannot seek to anchor block: %s\n",
3351                                strerror(errno));
3352                         goto out;
3353                 }
3354                 if ((unsigned int)write(sd->fd, super->migr_rec_buf,
3355                     MIGR_REC_BUF_SECTORS*sector_size) !=
3356                     MIGR_REC_BUF_SECTORS*sector_size) {
3357                         pr_err("Cannot write migr record block: %s\n",
3358                                strerror(errno));
3359                         goto out;
3360                 }
3361         }
3362         if (sector_size == 4096)
3363                 convert_from_4k_imsm_migr_rec(super);
3364         /* update checkpoint information in metadata */
3365         len = imsm_create_metadata_checkpoint_update(super, &u);
3366         if (len <= 0) {
3367                 dprintf("imsm: Cannot prepare update\n");
3368                 goto out;
3369         }
3370         /* update metadata locally */
3371         imsm_update_metadata_locally(st, u, len);
3372         /* and possibly remotely */
3373         if (st->update_tail) {
3374                 append_metadata_update(st, u, len);
3375                 /* during reshape we do all work inside metadata handler
3376                  * manage_reshape(), so metadata update has to be triggered
3377                  * insida it
3378                  */
3379                 flush_metadata_updates(st);
3380                 st->update_tail = &st->updates;
3381         } else
3382                 free(u);
3383
3384         retval = 0;
3385  out:
3386         return retval;
3387 }
3388
3389 /* spare/missing disks activations are not allowe when
3390  * array/container performs reshape operation, because
3391  * all arrays in container works on the same disks set
3392  */
3393 int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
3394 {
3395         int rv = 0;
3396         struct intel_dev *i_dev;
3397         struct imsm_dev *dev;
3398
3399         /* check whole container
3400          */
3401         for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) {
3402                 dev = i_dev->dev;
3403                 if (is_gen_migration(dev)) {
3404                         /* No repair during any migration in container
3405                          */
3406                         rv = 1;
3407                         break;
3408                 }
3409         }
3410         return rv;
3411 }
3412 static unsigned long long imsm_component_size_alignment_check(int level,
3413                                               int chunk_size,
3414                                               unsigned int sector_size,
3415                                               unsigned long long component_size)
3416 {
3417         unsigned int component_size_alignment;
3418
3419         /* check component size alignment
3420         */
3421         component_size_alignment = component_size % (chunk_size/sector_size);
3422
3423         dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alignment = %u\n",
3424                 level, chunk_size, component_size,
3425                 component_size_alignment);
3426
3427         if (component_size_alignment && (level != 1) && (level != UnSet)) {
3428                 dprintf("imsm: reported component size aligned from %llu ",
3429                         component_size);
3430                 component_size -= component_size_alignment;
3431                 dprintf_cont("to %llu (%i).\n",
3432                         component_size, component_size_alignment);
3433         }
3434
3435         return component_size;
3436 }
3437
3438 /*******************************************************************************
3439  * Function:    get_bitmap_header_sector
3440  * Description: Returns the sector where the bitmap header is placed.
3441  * Parameters:
3442  *      st              : supertype information
3443  *      dev_idx         : index of the device with bitmap
3444  *
3445  * Returns:
3446  *       The sector where the bitmap header is placed
3447  ******************************************************************************/
3448 static unsigned long long get_bitmap_header_sector(struct intel_super *super,
3449                                                    int dev_idx)
3450 {
3451         struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
3452         struct imsm_map *map = get_imsm_map(dev, MAP_0);
3453
3454         if (!super->sector_size) {
3455                 dprintf("sector size is not set\n");
3456                 return 0;
3457         }
3458
3459         return pba_of_lba0(map) + calc_component_size(map, dev) +
3460                (IMSM_BITMAP_HEADER_OFFSET / super->sector_size);
3461 }
3462
3463 /*******************************************************************************
3464  * Function:    get_bitmap_sector
3465  * Description: Returns the sector where the bitmap is placed.
3466  * Parameters:
3467  *      st              : supertype information
3468  *      dev_idx         : index of the device with bitmap
3469  *
3470  * Returns:
3471  *       The sector where the bitmap is placed
3472  ******************************************************************************/
3473 static unsigned long long get_bitmap_sector(struct intel_super *super,
3474                                             int dev_idx)
3475 {
3476         if (!super->sector_size) {
3477                 dprintf("sector size is not set\n");
3478                 return 0;
3479         }
3480
3481         return get_bitmap_header_sector(super, dev_idx) +
3482                (IMSM_BITMAP_HEADER_SIZE / super->sector_size);
3483 }
3484
3485 static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
3486 {
3487         struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
3488         struct imsm_map *map = get_imsm_map(dev, MAP_0);
3489
3490         return pba_of_lba0(map) +
3491                (num_data_stripes(map) * map->blocks_per_strip);
3492 }
3493
3494 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
3495 {
3496         struct intel_super *super = st->sb;
3497         struct migr_record *migr_rec = super->migr_rec;
3498         struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
3499         struct imsm_map *map = get_imsm_map(dev, MAP_0);
3500         struct imsm_map *prev_map = get_imsm_map(dev, MAP_1);
3501         struct imsm_map *map_to_analyse = map;
3502         struct dl *dl;
3503         int map_disks = info->array.raid_disks;
3504
3505         memset(info, 0, sizeof(*info));
3506         if (prev_map)
3507                 map_to_analyse = prev_map;
3508
3509         dl = super->current_disk;
3510
3511         info->container_member    = super->current_vol;
3512         info->array.raid_disks    = map->num_members;
3513         info->array.level         = get_imsm_raid_level(map_to_analyse);
3514         info->array.layout        = imsm_level_to_layout(info->array.level);
3515         info->array.md_minor      = -1;
3516         info->array.ctime         = 0;
3517         info->array.utime         = 0;
3518         info->array.chunk_size    =
3519                 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
3520         info->array.state         = !(dev->vol.dirty & RAIDVOL_DIRTY);
3521         info->custom_array_size   = imsm_dev_size(dev);
3522         info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
3523
3524         if (is_gen_migration(dev)) {
3525                 /*
3526                  * device prev_map should be added if it is in the middle
3527                  * of migration
3528                  */
3529                 assert(prev_map);
3530
3531                 info->reshape_active = 1;
3532                 info->new_level = get_imsm_raid_level(map);
3533                 info->new_layout = imsm_level_to_layout(info->new_level);
3534                 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
3535                 info->delta_disks = map->num_members - prev_map->num_members;
3536                 if (info->delta_disks) {
3537                         /* this needs to be applied to every array
3538                          * in the container.
3539                          */
3540                         info->reshape_active = CONTAINER_RESHAPE;
3541                 }
3542                 /* We shape information that we give to md might have to be
3543                  * modify to cope with md's requirement for reshaping arrays.
3544                  * For example, when reshaping a RAID0, md requires it to be
3545                  * presented as a degraded RAID4.
3546                  * Also if a RAID0 is migrating to a RAID5 we need to specify
3547                  * the array as already being RAID5, but the 'before' layout
3548                  * is a RAID4-like layout.
3549                  */
3550                 switch (info->array.level) {
3551                 case 0:
3552                         switch(info->new_level) {
3553                         case 0:
3554                                 /* conversion is happening as RAID4 */
3555                                 info->array.level = 4;
3556                                 info->array.raid_disks += 1;
3557                                 break;
3558                         case 5:
3559                                 /* conversion is happening as RAID5 */
3560                                 info->array.level = 5;
3561                                 info->array.layout = ALGORITHM_PARITY_N;
3562                                 info->delta_disks -= 1;
3563                                 break;
3564                         default:
3565                                 /* FIXME error message */
3566                                 info->array.level = UnSet;
3567                                 break;
3568                         }
3569                         break;
3570                 }
3571         } else {
3572                 info->new_level = UnSet;
3573                 info->new_layout = UnSet;
3574                 info->new_chunk = info->array.chunk_size;
3575                 info->delta_disks = 0;
3576         }
3577
3578         if (dl) {
3579                 info->disk.major = dl->major;
3580                 info->disk.minor = dl->minor;
3581                 info->disk.number = dl->index;
3582                 info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse,
3583                                                           dl->index);
3584         }
3585
3586         info->data_offset         = pba_of_lba0(map_to_analyse);
3587         info->component_size = calc_component_size(map, dev);
3588         info->component_size = imsm_component_size_alignment_check(
3589                                                         info->array.level,
3590                                                         info->array.chunk_size,
3591                                                         super->sector_size,
3592                                                         info->component_size);
3593         info->bb.supported = 1;
3594
3595         memset(info->uuid, 0, sizeof(info->uuid));
3596         info->recovery_start = MaxSector;
3597
3598         if (info->array.level == 5 &&
3599             (dev->rwh_policy == RWH_DISTRIBUTED ||
3600              dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)) {
3601                 info->consistency_policy = CONSISTENCY_POLICY_PPL;
3602                 info->ppl_sector = get_ppl_sector(super, super->current_vol);
3603                 if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
3604                         info->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
3605                 else
3606                         info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE)
3607                                           >> 9;
3608         } else if (info->array.level <= 0) {
3609                 info->consistency_policy = CONSISTENCY_POLICY_NONE;
3610         } else {
3611                 if (dev->rwh_policy == RWH_BITMAP) {
3612                         info->bitmap_offset = get_bitmap_sector(super, super->current_vol);
3613                         info->consistency_policy = CONSISTENCY_POLICY_BITMAP;
3614                 } else {
3615                         info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
3616                 }
3617         }
3618
3619         info->reshape_progress = 0;
3620         info->resync_start = MaxSector;
3621         if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
3622             !(info->array.state & 1)) &&
3623             imsm_reshape_blocks_arrays_changes(super) == 0) {
3624                 info->resync_start = 0;
3625         }
3626         if (dev->vol.migr_state) {
3627                 switch (migr_type(dev)) {
3628                 case MIGR_REPAIR:
3629                 case MIGR_INIT: {
3630                         __u64 blocks_per_unit = blocks_per_migr_unit(super,
3631                                                                      dev);
3632                         __u64 units = vol_curr_migr_unit(dev);
3633
3634                         info->resync_start = blocks_per_unit * units;
3635                         break;
3636                 }
3637                 case MIGR_GEN_MIGR: {
3638                         __u64 blocks_per_unit = blocks_per_migr_unit(super,
3639                                                                      dev);
3640                         __u64 units = current_migr_unit(migr_rec);
3641                         int used_disks;
3642
3643                         if (__le32_to_cpu(migr_rec->ascending_migr) &&
3644                             (units <
3645                                 (get_num_migr_units(migr_rec)-1)) &&
3646                             (super->migr_rec->rec_status ==
3647                                         __cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
3648                                 units++;
3649
3650                         info->reshape_progress = blocks_per_unit * units;
3651
3652                         dprintf("IMSM: General Migration checkpoint : %llu (%llu) -> read reshape progress : %llu\n",
3653                                 (unsigned long long)units,
3654                                 (unsigned long long)blocks_per_unit,
3655                                 info->reshape_progress);
3656
3657                         used_disks = imsm_num_data_members(prev_map);
3658                         if (used_disks > 0) {
3659                                 info->custom_array_size = per_dev_array_size(map) *
3660                                         used_disks;
3661                         }
3662                 }
3663                 case MIGR_VERIFY:
3664                         /* we could emulate the checkpointing of
3665                          * 'sync_action=check' migrations, but for now
3666                          * we just immediately complete them
3667                          */
3668                 case MIGR_REBUILD:
3669                         /* this is handled by container_content_imsm() */
3670                 case MIGR_STATE_CHANGE:
3671                         /* FIXME handle other migrations */
3672                 default:
3673                         /* we are not dirty, so... */
3674                         info->resync_start = MaxSector;
3675                 }
3676         }
3677
3678         strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
3679         info->name[MAX_RAID_SERIAL_LEN] = 0;
3680
3681         info->array.major_version = -1;
3682         info->array.minor_version = -2;
3683         sprintf(info->text_version, "/%s/%d", st->container_devnm, info->container_member);
3684         info->safe_mode_delay = 4000;  /* 4 secs like the Matrix driver */
3685         uuid_from_super_imsm(st, info->uuid);
3686
3687         if (dmap) {
3688                 int i, j;
3689                 for (i=0; i<map_disks; i++) {
3690                         dmap[i] = 0;
3691                         if (i < info->array.raid_disks) {
3692                                 struct imsm_disk *dsk;
3693                                 j = get_imsm_disk_idx(dev, i, MAP_X);
3694                                 dsk = get_imsm_disk(super, j);
3695                                 if (dsk && (dsk->status & CONFIGURED_DISK))
3696                                         dmap[i] = 1;
3697                         }
3698                 }
3699         }
3700 }
3701
3702 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
3703                                 int failed, int look_in_map);
3704
3705 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
3706                              int look_in_map);
3707
3708 static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
3709 {
3710         if (is_gen_migration(dev)) {
3711                 int failed;
3712                 __u8 map_state;
3713                 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
3714
3715                 failed = imsm_count_failed(super, dev, MAP_1);
3716                 map_state = imsm_check_degraded(super, dev, failed, MAP_1);
3717                 if (map2->map_state != map_state) {
3718                         map2->map_state = map_state;
3719                         super->updates_pending++;
3720                 }
3721         }
3722 }
3723
3724 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
3725 {
3726         struct dl *d;
3727
3728         for (d = super->missing; d; d = d->next)
3729                 if (d->index == index)
3730                         return &d->disk;
3731         return NULL;
3732 }
3733
3734 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
3735 {
3736         struct intel_super *super = st->sb;
3737         struct imsm_disk *disk;
3738         int map_disks = info->array.raid_disks;
3739         int max_enough = -1;
3740         int i;
3741         struct imsm_super *mpb;
3742
3743         if (super->current_vol >= 0) {
3744                 getinfo_super_imsm_volume(st, info, map);
3745                 return;
3746         }
3747         memset(info, 0, sizeof(*info));
3748
3749         /* Set raid_disks to zero so that Assemble will always pull in valid
3750          * spares
3751          */
3752         info->array.raid_disks    = 0;
3753         info->array.level         = LEVEL_CONTAINER;
3754         info->array.layout        = 0;
3755         info->array.md_minor      = -1;
3756         info->array.ctime         = 0; /* N/A for imsm */
3757         info->array.utime         = 0;
3758         info->array.chunk_size    = 0;
3759
3760         info->disk.major = 0;
3761         info->disk.minor = 0;
3762         info->disk.raid_disk = -1;
3763         info->reshape_active = 0;
3764         info->array.major_version = -1;
3765         info->array.minor_version = -2;
3766         strcpy(info->text_version, "imsm");
3767         info->safe_mode_delay = 0;
3768         info->disk.number = -1;
3769         info->disk.state = 0;
3770         info->name[0] = 0;
3771         info->recovery_start = MaxSector;
3772         info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
3773         info->bb.supported = 1;
3774
3775         /* do we have the all the insync disks that we expect? */
3776         mpb = super->anchor;
3777         info->events = __le32_to_cpu(mpb->generation_num);
3778
3779         for (i = 0; i < mpb->num_raid_devs; i++) {
3780                 struct imsm_dev *dev = get_imsm_dev(super, i);
3781                 int failed, enough, j, missing = 0;
3782                 struct imsm_map *map;
3783                 __u8 state;
3784
3785                 failed = imsm_count_failed(super, dev, MAP_0);
3786                 state = imsm_check_degraded(super, dev, failed, MAP_0);
3787                 map = get_imsm_map(dev, MAP_0);
3788
3789                 /* any newly missing disks?
3790                  * (catches single-degraded vs double-degraded)
3791                  */
3792                 for (j = 0; j < map->num_members; j++) {
3793                         __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
3794                         __u32 idx = ord_to_idx(ord);
3795
3796                         if (super->disks && super->disks->index == (int)idx)
3797                                 info->disk.raid_disk = j;
3798
3799                         if (!(ord & IMSM_ORD_REBUILD) &&
3800                             get_imsm_missing(super, idx)) {
3801                                 missing = 1;
3802                                 break;
3803                         }
3804                 }
3805
3806                 if (state == IMSM_T_STATE_FAILED)
3807                         enough = -1;
3808                 else if (state == IMSM_T_STATE_DEGRADED &&
3809                          (state != map->map_state || missing))
3810                         enough = 0;
3811                 else /* we're normal, or already degraded */
3812                         enough = 1;
3813                 if (is_gen_migration(dev) && missing) {
3814                         /* during general migration we need all disks
3815                          * that process is running on.
3816                          * No new missing disk is allowed.
3817                          */
3818                         max_enough = -1;
3819                         enough = -1;
3820                         /* no more checks necessary
3821                          */
3822                         break;
3823                 }
3824                 /* in the missing/failed disk case check to see
3825                  * if at least one array is runnable
3826                  */
3827                 max_enough = max(max_enough, enough);
3828         }
3829
3830         info->container_enough = max_enough;
3831
3832         if (super->disks) {
3833                 __u32 reserved = imsm_reserved_sectors(super, super->disks);
3834
3835                 disk = &super->disks->disk;
3836                 info->data_offset = total_blocks(&super->disks->disk) - reserved;
3837                 info->component_size = reserved;
3838                 info->disk.state  = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
3839                 /* we don't change info->disk.raid_disk here because
3840                  * this state will be finalized in mdmon after we have
3841                  * found the 'most fresh' version of the metadata
3842                  */
3843                 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
3844                 info->disk.state |= (is_spare(disk) || is_journal(disk)) ?
3845                                     0 : (1 << MD_DISK_SYNC);
3846         }
3847
3848         /* only call uuid_from_super_imsm when this disk is part of a populated container,
3849          * ->compare_super may have updated the 'num_raid_devs' field for spares
3850          */
3851         if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
3852                 uuid_from_super_imsm(st, info->uuid);
3853         else
3854                 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
3855
3856         /* I don't know how to compute 'map' on imsm, so use safe default */
3857         if (map) {
3858                 int i;
3859                 for (i = 0; i < map_disks; i++)
3860                         map[i] = 1;
3861         }
3862
3863 }
3864
3865 /* allocates memory and fills disk in mdinfo structure
3866  * for each disk in array */
3867 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
3868 {
3869         struct mdinfo *mddev;
3870         struct intel_super *super = st->sb;
3871         struct imsm_disk *disk;
3872         int count = 0;
3873         struct dl *dl;
3874         if (!super || !super->disks)
3875                 return NULL;
3876         dl = super->disks;
3877         mddev = xcalloc(1, sizeof(*mddev));
3878         while (dl) {
3879                 struct mdinfo *tmp;
3880                 disk = &dl->disk;
3881                 tmp = xcalloc(1, sizeof(*tmp));
3882                 if (mddev->devs)
3883                         tmp->next = mddev->devs;
3884                 mddev->devs = tmp;
3885                 tmp->disk.number = count++;
3886                 tmp->disk.major = dl->major;
3887                 tmp->disk.minor = dl->minor;
3888                 tmp->disk.state = is_configured(disk) ?
3889                                   (1 << MD_DISK_ACTIVE) : 0;
3890                 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
3891                 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
3892                 tmp->disk.raid_disk = -1;
3893                 dl = dl->next;
3894         }
3895         return mddev;
3896 }
3897
3898 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
3899                              enum update_opt update, char *devname,
3900                              int verbose, int uuid_set, char *homehost)
3901 {
3902         /* For 'assemble' and 'force' we need to return non-zero if any
3903          * change was made.  For others, the return value is ignored.
3904          * Update options are:
3905          *  force-one : This device looks a bit old but needs to be included,
3906          *        update age info appropriately.
3907          *  assemble: clear any 'faulty' flag to allow this device to
3908          *              be assembled.
3909          *  force-array: Array is degraded but being forced, mark it clean
3910          *         if that will be needed to assemble it.
3911          *
3912          *  newdev:  not used ????
3913          *  grow:  Array has gained a new device - this is currently for
3914          *              linear only
3915          *  resync: mark as dirty so a resync will happen.
3916          *  name:  update the name - preserving the homehost
3917          *  uuid:  Change the uuid of the array to match watch is given
3918          *
3919          * Following are not relevant for this imsm:
3920          *  sparc2.2 : update from old dodgey metadata
3921          *  super-minor: change the preferred_minor number
3922          *  summaries:  update redundant counters.
3923          *  homehost:  update the recorded homehost
3924          *  _reshape_progress: record new reshape_progress position.
3925          */
3926         int rv = 1;
3927         struct intel_super *super = st->sb;
3928         struct imsm_super *mpb;
3929
3930         /* we can only update container info */
3931         if (!super || super->current_vol >= 0 || !super->anchor)
3932                 return 1;
3933
3934         mpb = super->anchor;
3935
3936         switch (update) {
3937         case UOPT_UUID:
3938                 /* We take this to mean that the family_num should be updated.
3939                  * However that is much smaller than the uuid so we cannot really
3940                  * allow an explicit uuid to be given.  And it is hard to reliably
3941                  * know if one was.
3942                  * So if !uuid_set we know the current uuid is random and just used
3943                  * the first 'int' and copy it to the other 3 positions.
3944                  * Otherwise we require the 4 'int's to be the same as would be the
3945                  * case if we are using a random uuid.  So an explicit uuid will be
3946                  * accepted as long as all for ints are the same... which shouldn't hurt
3947                  */
3948                 if (!uuid_set) {
3949                         info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0];
3950                         rv = 0;
3951                 } else {
3952                         if (info->uuid[0] != info->uuid[1] ||
3953                             info->uuid[1] != info->uuid[2] ||
3954                             info->uuid[2] != info->uuid[3])
3955                                 rv = -1;
3956                         else
3957                                 rv = 0;
3958                 }
3959                 if (rv == 0)
3960                         mpb->orig_family_num = info->uuid[0];
3961                 break;
3962         case UOPT_SPEC_ASSEMBLE:
3963                 rv = 0;
3964                 break;
3965         default:
3966                 rv = -1;
3967                 break;
3968         }
3969
3970         /* successful update? recompute checksum */
3971         if (rv == 0)
3972                 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
3973
3974         return rv;
3975 }
3976
3977 static size_t disks_to_mpb_size(int disks)
3978 {
3979         size_t size;
3980
3981         size = sizeof(struct imsm_super);
3982         size += (disks - 1) * sizeof(struct imsm_disk);
3983         size += 2 * sizeof(struct imsm_dev);
3984         /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
3985         size += (4 - 2) * sizeof(struct imsm_map);
3986         /* 4 possible disk_ord_tbl's */
3987         size += 4 * (disks - 1) * sizeof(__u32);
3988         /* maximum bbm log */
3989         size += sizeof(struct bbm_log);
3990
3991         return size;
3992 }
3993
3994 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize,
3995                              unsigned long long data_offset)
3996 {
3997         if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
3998                 return 0;
3999
4000         return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
4001 }
4002
4003 static void free_devlist(struct intel_super *super)
4004 {
4005         struct intel_dev *dv;
4006
4007         while (super->devlist) {
4008                 dv = super->devlist->next;
4009                 free(super->devlist->dev);
4010                 free(super->devlist);
4011                 super->devlist = dv;
4012         }
4013 }
4014
4015 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
4016 {
4017         memcpy(dest, src, sizeof_imsm_dev(src, 0));
4018 }
4019
4020 static int compare_super_imsm(struct supertype *st, struct supertype *tst,
4021                               int verbose)
4022 {
4023         /*  return:
4024          *  0 same, or first was empty, and second was copied
4025          *  1 sb are different
4026          */
4027         struct intel_super *first = st->sb;
4028         struct intel_super *sec = tst->sb;
4029
4030         if (!first) {
4031                 st->sb = tst->sb;
4032                 tst->sb = NULL;
4033                 return 0;
4034         }
4035
4036         /* in platform dependent environment test if the disks
4037          * use the same Intel hba
4038          * if not on Intel hba at all, allow anything.
4039          * doesn't check HBAs if num_raid_devs is not set, as it means
4040          * it is a free floating spare, and all spares regardless of HBA type
4041          * will fall into separate container during the assembly
4042          */
4043         if (first->hba && sec->hba && first->anchor->num_raid_devs != 0) {
4044                 if (first->hba->type != sec->hba->type) {
4045                         if (verbose)
4046                                 pr_err("HBAs of devices do not match %s != %s\n",
4047                                        get_sys_dev_type(first->hba->type),
4048                                        get_sys_dev_type(sec->hba->type));
4049                         return 1;
4050                 }
4051                 if (first->orom != sec->orom) {
4052                         if (verbose)
4053                                 pr_err("HBAs of devices do not match %s != %s\n",
4054                                        first->hba->pci_id, sec->hba->pci_id);
4055                         return 1;
4056                 }
4057         }
4058
4059         if (first->anchor->num_raid_devs > 0 &&
4060             sec->anchor->num_raid_devs > 0) {
4061                 /* Determine if these disks might ever have been
4062                  * related.  Further disambiguation can only take place
4063                  * in load_super_imsm_all
4064                  */
4065                 __u32 first_family = first->anchor->orig_family_num;
4066                 __u32 sec_family = sec->anchor->orig_family_num;
4067
4068                 if (memcmp(first->anchor->sig, sec->anchor->sig,
4069                            MAX_SIGNATURE_LENGTH) != 0)
4070                         return 1;
4071
4072                 if (first_family == 0)
4073                         first_family = first->anchor->family_num;
4074                 if (sec_family == 0)
4075                         sec_family = sec->anchor->family_num;
4076
4077                 if (first_family != sec_family)
4078                         return 1;
4079
4080         }
4081
4082         /* if an anchor does not have num_raid_devs set then it is a free
4083         * floating spare. don't assosiate spare with any array, as during assembly
4084         * spares shall fall into separate container, from which they can be moved
4085         * when necessary
4086         */
4087         if (first->anchor->num_raid_devs ^ sec->anchor->num_raid_devs)
4088                 return 1;
4089
4090         return 0;
4091 }
4092
4093 static void fd2devname(int fd, char *name)
4094 {
4095         char *nm;
4096
4097         nm = fd2kname(fd);
4098         if (!nm)
4099                 return;
4100
4101         snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
4102 }
4103
4104 static int nvme_get_serial(int fd, void *buf, size_t buf_len)
4105 {
4106         char path[PATH_MAX];
4107         char *name = fd2kname(fd);
4108
4109         if (!name)
4110                 return 1;
4111
4112         if (strncmp(name, "nvme", 4) != 0)
4113                 return 1;
4114
4115         if (!diskfd_to_devpath(fd, 1, path))
4116                 return 1;
4117
4118         return devpath_to_char(path, "serial", buf, buf_len, 0);
4119 }
4120
4121 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
4122
4123 static int imsm_read_serial(int fd, char *devname,
4124                             __u8 *serial, size_t serial_buf_len)
4125 {
4126         char buf[50];
4127         int rv;
4128         size_t len;
4129         char *dest;
4130         char *src;
4131         unsigned int i;
4132
4133         memset(buf, 0, sizeof(buf));
4134
4135         if (check_env("IMSM_DEVNAME_AS_SERIAL")) {
4136                 memset(serial, 0, serial_buf_len);
4137                 fd2devname(fd, (char *) serial);
4138                 return 0;
4139         }
4140
4141         rv = nvme_get_serial(fd, buf, sizeof(buf));
4142
4143         if (rv)
4144                 rv = scsi_get_serial(fd, buf, sizeof(buf));
4145
4146         if (rv != 0) {
4147                 if (devname)
4148                         pr_err("Failed to retrieve serial for %s\n",
4149                                devname);
4150                 return rv;
4151         }
4152
4153         /* trim all whitespace and non-printable characters and convert
4154          * ':' to ';'
4155          */
4156         for (i = 0, dest = buf; i < sizeof(buf) && buf[i]; i++) {
4157                 src = &buf[i];
4158                 if (*src > 0x20) {
4159                         /* ':' is reserved for use in placeholder serial
4160                          * numbers for missing disks
4161                          */
4162                         if (*src == ':')
4163                                 *dest++ = ';';
4164                         else
4165                                 *dest++ = *src;
4166                 }
4167         }
4168         len = dest - buf;
4169         dest = buf;
4170
4171         if (len > serial_buf_len) {
4172                 /* truncate leading characters */
4173                 dest += len - serial_buf_len;
4174                 len = serial_buf_len;
4175         }
4176
4177         memset(serial, 0, serial_buf_len);
4178         memcpy(serial, dest, len);
4179
4180         return 0;
4181 }
4182
4183 static int serialcmp(__u8 *s1, __u8 *s2)
4184 {
4185         return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
4186 }
4187
4188 static void serialcpy(__u8 *dest, __u8 *src)
4189 {
4190         strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
4191 }
4192
4193 static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
4194 {
4195         struct dl *dl;
4196
4197         for (dl = super->disks; dl; dl = dl->next)
4198                 if (serialcmp(dl->serial, serial) == 0)
4199                         break;
4200
4201         return dl;
4202 }
4203
4204 static struct imsm_disk *
4205 __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
4206 {
4207         int i;
4208
4209         for (i = 0; i < mpb->num_disks; i++) {
4210                 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
4211
4212                 if (serialcmp(disk->serial, serial) == 0) {
4213                         if (idx)
4214                                 *idx = i;
4215                         return disk;
4216                 }
4217         }
4218
4219         return NULL;
4220 }
4221
4222 static int
4223 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
4224 {
4225         struct imsm_disk *disk;
4226         struct dl *dl;
4227         struct stat stb;
4228         int rv;
4229         char name[40];
4230         __u8 serial[MAX_RAID_SERIAL_LEN];
4231
4232         rv = imsm_read_serial(fd, devname, serial, MAX_RAID_SERIAL_LEN);
4233
4234         if (rv != 0)
4235                 return 2;
4236
4237         dl = xcalloc(1, sizeof(*dl));
4238
4239         fstat(fd, &stb);
4240         dl->major = major(stb.st_rdev);
4241         dl->minor = minor(stb.st_rdev);
4242         dl->next = super->disks;
4243         dl->fd = keep_fd ? fd : -1;
4244         assert(super->disks == NULL);
4245         super->disks = dl;
4246         serialcpy(dl->serial, serial);
4247         dl->index = -2;
4248         dl->e = NULL;
4249         fd2devname(fd, name);
4250         if (devname)
4251                 dl->devname = xstrdup(devname);
4252         else
4253                 dl->devname = xstrdup(name);
4254
4255         /* look up this disk's index in the current anchor */
4256         disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
4257         if (disk) {
4258                 dl->disk = *disk;
4259                 /* only set index on disks that are a member of a
4260                  * populated contianer, i.e. one with raid_devs
4261                  */
4262                 if (is_failed(&dl->disk))
4263                         dl->index = -2;
4264                 else if (is_spare(&dl->disk) || is_journal(&dl->disk))
4265                         dl->index = -1;
4266         }
4267
4268         return 0;
4269 }
4270
4271 /* When migrating map0 contains the 'destination' state while map1
4272  * contains the current state.  When not migrating map0 contains the
4273  * current state.  This routine assumes that map[0].map_state is set to
4274  * the current array state before being called.
4275  *
4276  * Migration is indicated by one of the following states
4277  * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
4278  * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
4279  *    map1state=unitialized)
4280  * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR  map0state=normal
4281  *    map1state=normal)
4282  * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
4283  *    map1state=degraded)
4284  * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
4285  *    map1state=normal)
4286  */
4287 static void migrate(struct imsm_dev *dev, struct intel_super *super,
4288                     __u8 to_state, int migr_type)
4289 {
4290         struct imsm_map *dest;
4291         struct imsm_map *src = get_imsm_map(dev, MAP_0);
4292
4293         dev->vol.migr_state = 1;
4294         set_migr_type(dev, migr_type);
4295         set_vol_curr_migr_unit(dev, 0);
4296         dest = get_imsm_map(dev, MAP_1);
4297
4298         /* duplicate and then set the target end state in map[0] */
4299         memcpy(dest, src, sizeof_imsm_map(src));
4300         if (migr_type == MIGR_GEN_MIGR) {
4301                 __u32 ord;
4302                 int i;
4303
4304                 for (i = 0; i < src->num_members; i++) {
4305                         ord = __le32_to_cpu(src->disk_ord_tbl[i]);
4306                         set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
4307                 }
4308         }
4309
4310         if (migr_type == MIGR_GEN_MIGR)
4311                 /* Clear migration record */
4312                 memset(super->migr_rec, 0, sizeof(struct migr_record));
4313
4314         src->map_state = to_state;
4315 }
4316
4317 static void end_migration(struct imsm_dev *dev, struct intel_super *super,
4318                           __u8 map_state)
4319 {
4320         struct imsm_map *map = get_imsm_map(dev, MAP_0);
4321         struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state == 0 ?
4322                                                     MAP_0 : MAP_1);
4323         int i, j;
4324
4325         /* merge any IMSM_ORD_REBUILD bits that were not successfully
4326          * completed in the last migration.
4327          *
4328          * FIXME add support for raid-level-migration
4329          */
4330         if (map_state != map->map_state && (is_gen_migration(dev) == false) &&
4331             prev->map_state != IMSM_T_STATE_UNINITIALIZED) {
4332                 /* when final map state is other than expected
4333                  * merge maps (not for migration)
4334                  */
4335                 int failed;
4336
4337                 for (i = 0; i < prev->num_members; i++)
4338                         for (j = 0; j < map->num_members; j++)
4339                                 /* during online capacity expansion
4340                                  * disks position can be changed
4341                                  * if takeover is used
4342                                  */
4343                                 if (ord_to_idx(map->disk_ord_tbl[j]) ==
4344                                     ord_to_idx(prev->disk_ord_tbl[i])) {
4345                                         map->disk_ord_tbl[j] |=
4346                                                 prev->disk_ord_tbl[i];
4347                                         break;
4348                                 }
4349                 failed = imsm_count_failed(super, dev, MAP_0);
4350                 map_state = imsm_check_degraded(super, dev, failed, MAP_0);
4351         }
4352
4353         dev->vol.migr_state = 0;
4354         set_migr_type(dev, 0);
4355         set_vol_curr_migr_unit(dev, 0);
4356         map->map_state = map_state;
4357 }
4358
4359 static int parse_raid_devices(struct intel_super *super)
4360 {
4361         int i;
4362         struct imsm_dev *dev_new;
4363         size_t len, len_migr;
4364         size_t max_len = 0;
4365         size_t space_needed = 0;
4366         struct imsm_super *mpb = super->anchor;
4367
4368         for (i = 0; i < super->anchor->num_raid_devs; i++) {
4369                 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
4370                 struct intel_dev *dv;
4371
4372                 len = sizeof_imsm_dev(dev_iter, 0);
4373                 len_migr = sizeof_imsm_dev(dev_iter, 1);
4374                 if (len_migr > len)
4375                         space_needed += len_migr - len;
4376
4377                 dv = xmalloc(sizeof(*dv));
4378                 if (max_len < len_migr)
4379                         max_len = len_migr;
4380                 if (max_len > len_migr)
4381                         space_needed += max_len - len_migr;
4382                 dev_new = xmalloc(max_len);
4383                 imsm_copy_dev(dev_new, dev_iter);
4384                 dv->dev = dev_new;
4385                 dv->index = i;
4386                 dv->next = super->devlist;
4387                 super->devlist = dv;
4388         }
4389
4390         /* ensure that super->buf is large enough when all raid devices
4391          * are migrating
4392          */
4393         if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
4394                 void *buf;
4395
4396                 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed,
4397                               super->sector_size);
4398                 if (posix_memalign(&buf, MAX_SECTOR_SIZE, len) != 0)
4399                         return 1;
4400
4401                 memcpy(buf, super->buf, super->len);
4402                 memset(buf + super->len, 0, len - super->len);
4403                 free(super->buf);
4404                 super->buf = buf;
4405                 super->len = len;
4406         }
4407
4408         super->extra_space += space_needed;
4409
4410         return 0;
4411 }
4412
4413 /*******************************************************************************
4414  * Function:    check_mpb_migr_compatibility
4415  * Description: Function checks for unsupported migration features:
4416  *              - migration optimization area (pba_of_lba0)
4417  *              - descending reshape (ascending_migr)
4418  * Parameters:
4419  *      super   : imsm metadata information
4420  * Returns:
4421  *       0 : migration is compatible
4422  *      -1 : migration is not compatible
4423  ******************************************************************************/
4424 int check_mpb_migr_compatibility(struct intel_super *super)
4425 {
4426         struct imsm_map *map0, *map1;
4427         struct migr_record *migr_rec = super->migr_rec;
4428         int i;
4429
4430         for (i = 0; i < super->anchor->num_raid_devs; i++) {
4431                 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
4432
4433                 if (dev_iter->vol.migr_state == 1 &&
4434                     dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
4435                         /* This device is migrating */
4436                         map0 = get_imsm_map(dev_iter, MAP_0);
4437                         map1 = get_imsm_map(dev_iter, MAP_1);
4438                         if (pba_of_lba0(map0) != pba_of_lba0(map1))
4439                                 /* migration optimization area was used */
4440                                 return -1;
4441                         if (migr_rec->ascending_migr == 0 &&
4442                             migr_rec->dest_depth_per_unit > 0)
4443                                 /* descending reshape not supported yet */
4444                                 return -1;
4445                 }
4446         }
4447         return 0;
4448 }
4449
4450 static void __free_imsm(struct intel_super *super, int free_disks);
4451
4452 /* load_imsm_mpb - read matrix metadata
4453  * allocates super->mpb to be freed by free_imsm
4454  */
4455 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
4456 {
4457         unsigned long long dsize;
4458         unsigned long long sectors;
4459         unsigned int sector_size = super->sector_size;
4460         struct stat;
4461         struct imsm_super *anchor;
4462         __u32 check_sum;
4463
4464         get_dev_size(fd, NULL, &dsize);
4465         if (dsize < 2*sector_size) {
4466                 if (devname)
4467                         pr_err("%s: device to small for imsm\n",
4468                                devname);
4469                 return 1;
4470         }
4471
4472         if (lseek64(fd, dsize - (sector_size * 2), SEEK_SET) < 0) {
4473                 if (devname)
4474                         pr_err("Cannot seek to anchor block on %s: %s\n",
4475                                devname, strerror(errno));
4476                 return 1;
4477         }
4478
4479         if (posix_memalign((void **)&anchor, sector_size, sector_size) != 0) {
4480                 if (devname)
4481                         pr_err("Failed to allocate imsm anchor buffer on %s\n", devname);
4482                 return 1;
4483         }
4484         if ((unsigned int)read(fd, anchor, sector_size) != sector_size) {
4485                 if (devname)
4486                         pr_err("Cannot read anchor block on %s: %s\n",
4487                                devname, strerror(errno));
4488                 free(anchor);
4489                 return 1;
4490         }
4491
4492         if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
4493                 if (devname)
4494                         pr_err("no IMSM anchor on %s\n", devname);
4495                 free(anchor);
4496                 return 2;
4497         }
4498
4499         __free_imsm(super, 0);
4500         /*  reload capability and hba */
4501
4502         /* capability and hba must be updated with new super allocation */
4503         find_intel_hba_capability(fd, super, devname);
4504         super->len = ROUND_UP(anchor->mpb_size, sector_size);
4505         if (posix_memalign(&super->buf, MAX_SECTOR_SIZE, super->len) != 0) {
4506                 if (devname)
4507                         pr_err("unable to allocate %zu byte mpb buffer\n",
4508                                super->len);
4509                 free(anchor);
4510                 return 2;
4511         }
4512         memcpy(super->buf, anchor, sector_size);
4513
4514         sectors = mpb_sectors(anchor, sector_size) - 1;
4515         free(anchor);
4516
4517         if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
4518             MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) {
4519                 pr_err("could not allocate migr_rec buffer\n");
4520                 free(super->buf);
4521                 super->buf = NULL;
4522                 return 2;
4523         }
4524         super->clean_migration_record_by_mdmon = 0;
4525
4526         if (!sectors) {
4527                 check_sum = __gen_imsm_checksum(super->anchor);
4528                 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
4529                         if (devname)
4530                                 pr_err("IMSM checksum %x != %x on %s\n",
4531                                        check_sum,
4532                                        __le32_to_cpu(super->anchor->check_sum),
4533                                        devname);
4534                         return 2;
4535                 }
4536
4537                 return 0;
4538         }
4539
4540         /* read the extended mpb */
4541         if (lseek64(fd, dsize - (sector_size * (2 + sectors)), SEEK_SET) < 0) {
4542                 if (devname)
4543                         pr_err("Cannot seek to extended mpb on %s: %s\n",
4544                                devname, strerror(errno));
4545                 return 1;
4546         }
4547
4548         if ((unsigned int)read(fd, super->buf + sector_size,
4549                     super->len - sector_size) != super->len - sector_size) {
4550                 if (devname)
4551                         pr_err("Cannot read extended mpb on %s: %s\n",
4552                                devname, strerror(errno));
4553                 return 2;
4554         }
4555
4556         check_sum = __gen_imsm_checksum(super->anchor);
4557         if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
4558                 if (devname)
4559                         pr_err("IMSM checksum %x != %x on %s\n",
4560                                check_sum, __le32_to_cpu(super->anchor->check_sum),
4561                                devname);
4562                 return 3;
4563         }
4564
4565         return 0;
4566 }
4567
4568 static int read_imsm_migr_rec(int fd, struct intel_super *super);
4569
4570 /* clears hi bits in metadata if MPB_ATTRIB_2TB_DISK not set */
4571 static void clear_hi(struct intel_super *super)
4572 {
4573         struct imsm_super *mpb = super->anchor;
4574         int i, n;
4575         if (mpb->attributes & MPB_ATTRIB_2TB_DISK)
4576                 return;
4577         for (i = 0; i < mpb->num_disks; ++i) {
4578                 struct imsm_disk *disk = &mpb->disk[i];
4579                 disk->total_blocks_hi = 0;
4580         }
4581         for (i = 0; i < mpb->num_raid_devs; ++i) {
4582                 struct imsm_dev *dev = get_imsm_dev(super, i);
4583                 for (n = 0; n < 2; ++n) {
4584                         struct imsm_map *map = get_imsm_map(dev, n);
4585                         if (!map)
4586                                 continue;
4587                         map->pba_of_lba0_hi = 0;
4588                         map->blocks_per_member_hi = 0;
4589                         map->num_data_stripes_hi = 0;
4590                 }
4591         }
4592 }
4593
4594 static int
4595 load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
4596 {
4597         int err;
4598
4599         err = load_imsm_mpb(fd, super, devname);
4600         if (err)
4601                 return err;
4602         if (super->sector_size == 4096)
4603                 convert_from_4k(super);
4604         err = load_imsm_disk(fd, super, devname, keep_fd);
4605         if (err)
4606                 return err;
4607         err = parse_raid_devices(super);
4608         if (err)
4609                 return err;
4610         err = load_bbm_log(super);
4611         clear_hi(super);
4612         return err;
4613 }
4614
4615 static void __free_imsm_disk(struct dl *d, int do_close)
4616 {
4617         if (do_close)
4618                 close_fd(&d->fd);
4619         if (d->devname)
4620                 free(d->devname);
4621         if (d->e)
4622                 free(d->e);
4623         free(d);
4624
4625 }
4626
4627 static void free_imsm_disks(struct intel_super *super)
4628 {
4629         struct dl *d;
4630
4631         while (super->disks) {
4632                 d = super->disks;
4633                 super->disks = d->next;
4634                 __free_imsm_disk(d, 1);
4635         }
4636         while (super->disk_mgmt_list) {
4637                 d = super->disk_mgmt_list;
4638                 super->disk_mgmt_list = d->next;
4639                 __free_imsm_disk(d, 1);
4640         }
4641         while (super->missing) {
4642                 d = super->missing;
4643                 super->missing = d->next;
4644                 __free_imsm_disk(d, 1);
4645         }
4646
4647 }
4648
4649 /* free all the pieces hanging off of a super pointer */
4650 static void __free_imsm(struct intel_super *super, int free_disks)
4651 {
4652         struct intel_hba *elem, *next;
4653
4654         if (super->buf) {
4655                 free(super->buf);
4656                 super->buf = NULL;
4657         }
4658         /* unlink capability description */
4659         super->orom = NULL;
4660         if (super->migr_rec_buf) {
4661                 free(super->migr_rec_buf);
4662                 super->migr_rec_buf = NULL;
4663         }
4664         if (free_disks)
4665                 free_imsm_disks(super);
4666         free_devlist(super);
4667         elem = super->hba;
4668         while (elem) {
4669                 if (elem->path)
4670                         free((void *)elem->path);
4671                 next = elem->next;
4672                 free(elem);
4673                 elem = next;
4674         }
4675         if (super->bbm_log)
4676                 free(super->bbm_log);
4677         super->hba = NULL;
4678 }
4679
4680 static void free_imsm(struct intel_super *super)
4681 {
4682         __free_imsm(super, 1);
4683         free(super->bb.entries);
4684         free(super);
4685 }
4686
4687 static void free_super_imsm(struct supertype *st)
4688 {
4689         struct intel_super *super = st->sb;
4690
4691         if (!super)
4692                 return;
4693
4694         free_imsm(super);
4695         st->sb = NULL;
4696 }
4697
4698 static struct intel_super *alloc_super(void)
4699 {
4700         struct intel_super *super = xcalloc(1, sizeof(*super));
4701
4702         super->current_vol = -1;
4703         super->create_offset = ~((unsigned long long) 0);
4704
4705         super->bb.entries = xmalloc(BBM_LOG_MAX_ENTRIES *
4706                                    sizeof(struct md_bb_entry));
4707         if (!super->bb.entries) {
4708                 free(super);
4709                 return NULL;
4710         }
4711
4712         return super;
4713 }
4714
4715 /*
4716  * find and allocate hba and OROM/EFI based on valid fd of RAID component device
4717  */
4718 static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
4719 {
4720         struct sys_dev *hba_name;
4721         int rv = 0;
4722
4723         if (is_fd_valid(fd) && test_partition(fd)) {
4724                 pr_err("imsm: %s is a partition, cannot be used in IMSM\n",
4725                        devname);
4726                 return 1;
4727         }
4728         if (!is_fd_valid(fd) || check_no_platform()) {
4729                 super->orom = NULL;
4730                 super->hba = NULL;
4731                 return 0;
4732         }
4733         hba_name = find_disk_attached_hba(fd, NULL);
4734         if (!hba_name) {
4735                 if (devname)
4736                         pr_err("%s is not attached to Intel(R) RAID controller.\n",
4737                                devname);
4738                 return 1;
4739         }
4740         rv = attach_hba_to_super(super, hba_name);
4741         if (rv == 2) {
4742                 if (devname) {
4743                         struct intel_hba *hba = super->hba;
4744
4745                         pr_err("%s is attached to Intel(R) %s %s (%s),\n"
4746                                 "    but the container is assigned to Intel(R) %s %s (",
4747                                 devname,
4748                                 get_sys_dev_type(hba_name->type),
4749                                 hba_name->type == SYS_DEV_VMD || hba_name->type == SYS_DEV_SATA_VMD ?
4750                                         "domain" : "RAID controller",
4751                                 hba_name->pci_id ? : "Err!",
4752                                 get_sys_dev_type(super->hba->type),
4753                                 hba->type == SYS_DEV_VMD || hba_name->type == SYS_DEV_SATA_VMD ?
4754                                         "domain" : "RAID controller");
4755
4756                         while (hba) {
4757                                 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
4758                                 if (hba->next)
4759                                         fprintf(stderr, ", ");
4760                                 hba = hba->next;
4761                         }
4762                         fprintf(stderr, ").\n"
4763                                 "    Mixing devices attached to different controllers is not allowed.\n");
4764                 }
4765                 return 2;
4766         }
4767         super->orom = find_imsm_capability(hba_name);
4768         if (!super->orom)
4769                 return 3;
4770
4771         return 0;
4772 }
4773
4774 /* find_missing - helper routine for load_super_imsm_all that identifies
4775  * disks that have disappeared from the system.  This routine relies on
4776  * the mpb being uptodate, which it is at load time.
4777  */
4778 static int find_missing(struct intel_super *super)
4779 {
4780         int i;
4781         struct imsm_super *mpb = super->anchor;
4782         struct dl *dl;
4783         struct imsm_disk *disk;
4784
4785         for (i = 0; i < mpb->num_disks; i++) {
4786                 disk = __get_imsm_disk(mpb, i);
4787                 dl = serial_to_dl(disk->serial, super);
4788                 if (dl)
4789                         continue;
4790
4791                 dl = xmalloc(sizeof(*dl));
4792                 dl->major = 0;
4793                 dl->minor = 0;
4794                 dl->fd = -1;
4795                 dl->devname = xstrdup("missing");
4796                 dl->index = i;
4797                 serialcpy(dl->serial, disk->serial);
4798                 dl->disk = *disk;
4799                 dl->e = NULL;
4800                 dl->next = super->missing;
4801                 super->missing = dl;
4802         }
4803
4804         return 0;
4805 }
4806
4807 static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
4808 {
4809         struct intel_disk *idisk = disk_list;
4810
4811         while (idisk) {
4812                 if (serialcmp(idisk->disk.serial, serial) == 0)
4813                         break;
4814                 idisk = idisk->next;
4815         }
4816
4817         return idisk;
4818 }
4819
4820 static int __prep_thunderdome(struct intel_super **table, int tbl_size,
4821                               struct intel_super *super,
4822                               struct intel_disk **disk_list)
4823 {
4824         struct imsm_disk *d = &super->disks->disk;
4825         struct imsm_super *mpb = super->anchor;
4826         int i, j;
4827
4828         for (i = 0; i < tbl_size; i++) {
4829                 struct imsm_super *tbl_mpb = table[i]->anchor;
4830                 struct imsm_disk *tbl_d = &table[i]->disks->disk;
4831
4832                 if (tbl_mpb->family_num == mpb->family_num) {
4833                         if (tbl_mpb->check_sum == mpb->check_sum) {
4834                                 dprintf("mpb from %d:%d matches %d:%d\n",
4835                                         super->disks->major,
4836                                         super->disks->minor,
4837                                         table[i]->disks->major,
4838                                         table[i]->disks->minor);
4839                                 break;
4840                         }
4841
4842                         if (((is_configured(d) && !is_configured(tbl_d)) ||
4843                              is_configured(d) == is_configured(tbl_d)) &&
4844                             tbl_mpb->generation_num < mpb->generation_num) {
4845                                 /* current version of the mpb is a
4846                                  * better candidate than the one in
4847                                  * super_table, but copy over "cross
4848                                  * generational" status
4849                                  */
4850                                 struct intel_disk *idisk;
4851
4852                                 dprintf("mpb from %d:%d replaces %d:%d\n",
4853                                         super->disks->major,
4854                                         super->disks->minor,
4855                                         table[i]->disks->major,
4856                                         table[i]->disks->minor);
4857
4858                                 idisk = disk_list_get(tbl_d->serial, *disk_list);
4859                                 if (idisk && is_failed(&idisk->disk))
4860                                         tbl_d->status |= FAILED_DISK;
4861                                 break;
4862                         } else {
4863                                 struct intel_disk *idisk;
4864                                 struct imsm_disk *disk;
4865
4866                                 /* tbl_mpb is more up to date, but copy
4867                                  * over cross generational status before
4868                                  * returning
4869                                  */
4870                                 disk = __serial_to_disk(d->serial, mpb, NULL);
4871                                 if (disk && is_failed(disk))
4872                                         d->status |= FAILED_DISK;
4873
4874                                 idisk = disk_list_get(d->serial, *disk_list);
4875                                 if (idisk) {
4876                                         idisk->owner = i;
4877                                         if (disk && is_configured(disk))
4878                                                 idisk->disk.status |= CONFIGURED_DISK;
4879                                 }
4880
4881                                 dprintf("mpb from %d:%d prefer %d:%d\n",
4882                                         super->disks->major,
4883                                         super->disks->minor,
4884                                         table[i]->disks->major,
4885                                         table[i]->disks->minor);
4886
4887                                 return tbl_size;
4888                         }
4889                 }
4890         }
4891
4892         if (i >= tbl_size)
4893                 table[tbl_size++] = super;
4894         else
4895                 table[i] = super;
4896
4897         /* update/extend the merged list of imsm_disk records */
4898         for (j = 0; j < mpb->num_disks; j++) {
4899                 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
4900                 struct intel_disk *idisk;
4901
4902                 idisk = disk_list_get(disk->serial, *disk_list);
4903                 if (idisk) {
4904                         idisk->disk.status |= disk->status;
4905                         if (is_configured(&idisk->disk) ||
4906                             is_failed(&idisk->disk))
4907                                 idisk->disk.status &= ~(SPARE_DISK);
4908                 } else {
4909                         idisk = xcalloc(1, sizeof(*idisk));
4910                         idisk->owner = IMSM_UNKNOWN_OWNER;
4911                         idisk->disk = *disk;
4912                         idisk->next = *disk_list;
4913                         *disk_list = idisk;
4914                 }
4915
4916                 if (serialcmp(idisk->disk.serial, d->serial) == 0)
4917                         idisk->owner = i;
4918         }
4919
4920         return tbl_size;
4921 }
4922
4923 static struct intel_super *
4924 validate_members(struct intel_super *super, struct intel_disk *disk_list,
4925                  const int owner)
4926 {
4927         struct imsm_super *mpb = super->anchor;
4928         int ok_count = 0;
4929         int i;
4930
4931         for (i = 0; i < mpb->num_disks; i++) {
4932                 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
4933                 struct intel_disk *idisk;
4934
4935                 idisk = disk_list_get(disk->serial, disk_list);
4936                 if (idisk) {
4937                         if (idisk->owner == owner ||
4938                             idisk->owner == IMSM_UNKNOWN_OWNER)
4939                                 ok_count++;
4940                         else
4941                                 dprintf("'%.16s' owner %d != %d\n",
4942                                         disk->serial, idisk->owner,
4943                                         owner);
4944                 } else {
4945                         dprintf("unknown disk %x [%d]: %.16s\n",
4946                                 __le32_to_cpu(mpb->family_num), i,
4947                                 disk->serial);
4948                         break;
4949                 }
4950         }
4951
4952         if (ok_count == mpb->num_disks)
4953                 return super;
4954         return NULL;
4955 }
4956
4957 static void show_conflicts(__u32 family_num, struct intel_super *super_list)
4958 {
4959         struct intel_super *s;
4960
4961         for (s = super_list; s; s = s->next) {
4962                 if (family_num != s->anchor->family_num)
4963                         continue;
4964                 pr_err("Conflict, offlining family %#x on '%s'\n",
4965                         __le32_to_cpu(family_num), s->disks->devname);
4966         }
4967 }
4968
4969 static struct intel_super *
4970 imsm_thunderdome(struct intel_super **super_list, int len)
4971 {
4972         struct intel_super *super_table[len];
4973         struct intel_disk *disk_list = NULL;
4974         struct intel_super *champion, *spare;
4975         struct intel_super *s, **del;
4976         int tbl_size = 0;
4977         int conflict;
4978         int i;
4979
4980         memset(super_table, 0, sizeof(super_table));
4981         for (s = *super_list; s; s = s->next)
4982                 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
4983
4984         for (i = 0; i < tbl_size; i++) {
4985                 struct imsm_disk *d;
4986                 struct intel_disk *idisk;
4987                 struct imsm_super *mpb = super_table[i]->anchor;
4988
4989                 s = super_table[i];
4990                 d = &s->disks->disk;
4991
4992                 /* 'd' must appear in merged disk list for its
4993                  * configuration to be valid
4994                  */
4995                 idisk = disk_list_get(d->serial, disk_list);
4996                 if (idisk && idisk->owner == i)
4997                         s = validate_members(s, disk_list, i);
4998                 else
4999                         s = NULL;
5000
5001                 if (!s)
5002                         dprintf("marking family: %#x from %d:%d offline\n",
5003                                 mpb->family_num,
5004                                 super_table[i]->disks->major,
5005                                 super_table[i]->disks->minor);
5006                 super_table[i] = s;
5007         }
5008
5009         /* This is where the mdadm implementation differs from the Windows
5010          * driver which has no strict concept of a container.  We can only
5011          * assemble one family from a container, so when returning a prodigal
5012          * array member to this system the code will not be able to disambiguate
5013          * the container contents that should be assembled ("foreign" versus
5014          * "local").  It requires user intervention to set the orig_family_num
5015          * to a new value to establish a new container.  The Windows driver in
5016          * this situation fixes up the volume name in place and manages the
5017          * foreign array as an independent entity.
5018          */
5019         s = NULL;
5020         spare = NULL;
5021         conflict = 0;
5022         for (i = 0; i < tbl_size; i++) {
5023                 struct intel_super *tbl_ent = super_table[i];
5024                 int is_spare = 0;
5025
5026                 if (!tbl_ent)
5027                         continue;
5028
5029                 if (tbl_ent->anchor->num_raid_devs == 0) {
5030                         spare = tbl_ent;
5031                         is_spare = 1;
5032                 }
5033
5034                 if (s && !is_spare) {
5035                         show_conflicts(tbl_ent->anchor->family_num, *super_list);
5036                         conflict++;
5037                 } else if (!s && !is_spare)
5038                         s = tbl_ent;
5039         }
5040
5041         if (!s)
5042                 s = spare;
5043         if (!s) {
5044                 champion = NULL;
5045                 goto out;
5046         }
5047         champion = s;
5048
5049         if (conflict)
5050                 pr_err("Chose family %#x on '%s', assemble conflicts to new container with '--update=uuid'\n",
5051                         __le32_to_cpu(s->anchor->family_num), s->disks->devname);
5052
5053         /* collect all dl's onto 'champion', and update them to
5054          * champion's version of the status
5055          */
5056         for (s = *super_list; s; s = s->next) {
5057                 struct imsm_super *mpb = champion->anchor;
5058                 struct dl *dl = s->disks;
5059
5060                 if (s == champion)
5061                         continue;
5062
5063                 mpb->attributes |= s->anchor->attributes & MPB_ATTRIB_2TB_DISK;
5064
5065                 for (i = 0; i < mpb->num_disks; i++) {
5066                         struct imsm_disk *disk;
5067
5068                         disk = __serial_to_disk(dl->serial, mpb, &dl->index);
5069                         if (disk) {
5070                                 dl->disk = *disk;
5071                                 /* only set index on disks that are a member of
5072                                  * a populated contianer, i.e. one with
5073                                  * raid_devs
5074                                  */
5075                                 if (is_failed(&dl->disk))
5076                                         dl->index = -2;
5077                                 else if (is_spare(&dl->disk))
5078                                         dl->index = -1;
5079                                 break;
5080                         }
5081                 }
5082
5083                 if (i >= mpb->num_disks) {
5084                         struct intel_disk *idisk;
5085
5086                         idisk = disk_list_get(dl->serial, disk_list);
5087                         if (idisk && is_spare(&idisk->disk) &&
5088                             !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
5089                                 dl->index = -1;
5090                         else {
5091                                 dl->index = -2;
5092                                 continue;
5093                         }
5094                 }
5095
5096                 dl->next = champion->disks;
5097                 champion->disks = dl;
5098                 s->disks = NULL;
5099         }
5100
5101         /* delete 'champion' from super_list */
5102         for (del = super_list; *del; ) {
5103                 if (*del == champion) {
5104                         *del = (*del)->next;
5105                         break;
5106                 } else
5107                         del = &(*del)->next;
5108         }
5109         champion->next = NULL;
5110
5111  out:
5112         while (disk_list) {
5113                 struct intel_disk *idisk = disk_list;
5114
5115                 disk_list = disk_list->next;
5116                 free(idisk);
5117         }
5118
5119         return champion;
5120 }
5121
5122 static int
5123 get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd);
5124 static int get_super_block(struct intel_super **super_list, char *devnm, char *devname,
5125                            int major, int minor, int keep_fd);
5126 static int
5127 get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list,
5128                         int *max, int keep_fd);
5129
5130 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
5131                                char *devname, struct md_list *devlist,
5132                                int keep_fd)
5133 {
5134         struct intel_super *super_list = NULL;
5135         struct intel_super *super = NULL;
5136         int err = 0;
5137         int i = 0;
5138
5139         if (is_fd_valid(fd))
5140                 /* 'fd' is an opened container */
5141                 err = get_sra_super_block(fd, &super_list, devname, &i, keep_fd);
5142         else
5143                 /* get super block from devlist devices */
5144                 err = get_devlist_super_block(devlist, &super_list, &i, keep_fd);
5145         if (err)
5146                 goto error;
5147         /* all mpbs enter, maybe one leaves */
5148         super = imsm_thunderdome(&super_list, i);
5149         if (!super) {
5150                 err = 1;
5151                 goto error;
5152         }
5153
5154         if (find_missing(super) != 0) {
5155                 free_imsm(super);
5156                 err = 2;
5157                 goto error;
5158         }
5159
5160         /* load migration record */
5161         err = load_imsm_migr_rec(super);
5162         if (err == -1) {
5163                 /* migration is in progress,
5164                  * but migr_rec cannot be loaded,
5165                  */
5166                 err = 4;
5167                 goto error;
5168         }
5169
5170         /* Check migration compatibility */
5171         if (err == 0 && check_mpb_migr_compatibility(super) != 0) {
5172                 pr_err("Unsupported migration detected");
5173                 if (devname)
5174                         fprintf(stderr, " on %s\n", devname);
5175                 else
5176                         fprintf(stderr, " (IMSM).\n");
5177
5178                 err = 5;
5179                 goto error;
5180         }
5181
5182         err = 0;
5183
5184  error:
5185         while (super_list) {
5186                 struct intel_super *s = super_list;
5187
5188                 super_list = super_list->next;
5189                 free_imsm(s);
5190         }
5191
5192         if (err)
5193                 return err;
5194
5195         *sbp = super;
5196         if (is_fd_valid(fd))
5197                 strcpy(st->container_devnm, fd2devnm(fd));
5198         else
5199                 st->container_devnm[0] = 0;
5200         if (err == 0 && st->ss == NULL) {
5201                 st->ss = &super_imsm;
5202                 st->minor_version = 0;
5203                 st->max_devs = IMSM_MAX_DEVICES;
5204         }
5205         return 0;
5206 }
5207
5208 static int
5209 get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list,
5210                         int *max, int keep_fd)
5211 {
5212         struct md_list *tmpdev;
5213         int err = 0;
5214         int i = 0;
5215
5216         for (i = 0, tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
5217                 if (tmpdev->used != 1)
5218                         continue;
5219                 if (tmpdev->container == 1) {
5220                         int lmax = 0;
5221                         int fd = dev_open(tmpdev->devname, O_RDONLY|O_EXCL);
5222                         if (!is_fd_valid(fd)) {
5223                                 pr_err("cannot open device %s: %s\n",
5224                                         tmpdev->devname, strerror(errno));
5225                                 err = 8;
5226                                 goto error;
5227                         }
5228                         err = get_sra_super_block(fd, super_list,
5229                                                   tmpdev->devname, &lmax,
5230                                                   keep_fd);
5231                         i += lmax;
5232                         close(fd);
5233                         if (err) {
5234                                 err = 7;
5235                                 goto error;
5236                         }
5237                 } else {
5238                         int major = major(tmpdev->st_rdev);
5239                         int minor = minor(tmpdev->st_rdev);
5240                         err = get_super_block(super_list,
5241                                               NULL,
5242                                               tmpdev->devname,
5243                                               major, minor,
5244                                               keep_fd);
5245                         i++;
5246                         if (err) {
5247                                 err = 6;
5248                                 goto error;
5249                         }
5250                 }
5251         }
5252  error:
5253         *max = i;
5254         return err;
5255 }
5256
5257 static int get_super_block(struct intel_super **super_list, char *devnm, char *devname,
5258                            int major, int minor, int keep_fd)
5259 {
5260         struct intel_super *s;
5261         char nm[32];
5262         int dfd = -1;
5263         int err = 0;
5264         int retry;
5265
5266         s = alloc_super();
5267         if (!s) {
5268                 err = 1;
5269                 goto error;
5270         }
5271
5272         sprintf(nm, "%d:%d", major, minor);
5273         dfd = dev_open(nm, O_RDWR);
5274         if (!is_fd_valid(dfd)) {
5275                 err = 2;
5276                 goto error;
5277         }
5278
5279         if (!get_dev_sector_size(dfd, NULL, &s->sector_size)) {
5280                 err = 2;
5281                 goto error;
5282         }
5283         find_intel_hba_capability(dfd, s, devname);
5284         err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
5285
5286         /* retry the load if we might have raced against mdmon */
5287         if (err == 3 && devnm && mdmon_running(devnm))
5288                 for (retry = 0; retry < 3; retry++) {
5289                         sleep_for(0, MSEC_TO_NSEC(3), true);
5290                         err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
5291                         if (err != 3)
5292                                 break;
5293                 }
5294  error:
5295         if (!err) {
5296                 s->next = *super_list;
5297                 *super_list = s;
5298         } else {
5299                 if (s)
5300                         free_imsm(s);
5301                 close_fd(&dfd);
5302         }
5303         if (!keep_fd)
5304                 close_fd(&dfd);
5305         return err;
5306
5307 }
5308
5309 static int
5310 get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd)
5311 {
5312         struct mdinfo *sra;
5313         char *devnm;
5314         struct mdinfo *sd;
5315         int err = 0;
5316         int i = 0;
5317         sra = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
5318         if (!sra)
5319                 return 1;
5320
5321         if (sra->array.major_version != -1 ||
5322             sra->array.minor_version != -2 ||
5323             strcmp(sra->text_version, "imsm") != 0) {
5324                 err = 1;
5325                 goto error;
5326         }
5327         /* load all mpbs */
5328         devnm = fd2devnm(fd);
5329         for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
5330                 if (get_super_block(super_list, devnm, devname,
5331                                     sd->disk.major, sd->disk.minor, keep_fd) != 0) {
5332                         err = 7;
5333                         goto error;
5334                 }
5335         }
5336  error:
5337         sysfs_free(sra);
5338         *max = i;
5339         return err;
5340 }
5341
5342 static int load_container_imsm(struct supertype *st, int fd, char *devname)
5343 {
5344         return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1);
5345 }
5346
5347 static int load_super_imsm(struct supertype *st, int fd, char *devname)
5348 {
5349         struct intel_super *super;
5350         int rv;
5351         int retry;
5352
5353         if (test_partition(fd))
5354                 /* IMSM not allowed on partitions */
5355                 return 1;
5356
5357         free_super_imsm(st);
5358
5359         super = alloc_super();
5360         if (!super)
5361                 return 1;
5362
5363         if (!get_dev_sector_size(fd, NULL, &super->sector_size)) {
5364                 free_imsm(super);
5365                 return 1;
5366         }
5367         /* Load hba and capabilities if they exist.
5368          * But do not preclude loading metadata in case capabilities or hba are
5369          * non-compliant and ignore_hw_compat is set.
5370          */
5371         rv = find_intel_hba_capability(fd, super, devname);
5372         /* no orom/efi or non-intel hba of the disk */
5373         if (rv != 0 && st->ignore_hw_compat == 0) {
5374                 if (devname)
5375                         pr_err("No OROM/EFI properties for %s\n", devname);
5376                 free_imsm(super);
5377                 return 2;
5378         }
5379         rv = load_and_parse_mpb(fd, super, devname, 0);
5380
5381         /* retry the load if we might have raced against mdmon */
5382         if (rv == 3) {
5383                 struct mdstat_ent *mdstat = NULL;
5384                 char *name = fd2kname(fd);
5385
5386                 if (name)
5387                         mdstat = mdstat_by_component(name);
5388
5389                 if (mdstat && mdmon_running(mdstat->devnm) && getpid() != mdmon_pid(mdstat->devnm)) {
5390                         for (retry = 0; retry < 3; retry++) {
5391                                 sleep_for(0, MSEC_TO_NSEC(3), true);
5392                                 rv = load_and_parse_mpb(fd, super, devname, 0);
5393                                 if (rv != 3)
5394                                         break;
5395                         }
5396                 }
5397
5398                 free_mdstat(mdstat);
5399         }
5400
5401         if (rv) {
5402                 if (devname)
5403                         pr_err("Failed to load all information sections on %s\n", devname);
5404                 free_imsm(super);
5405                 return rv;
5406         }
5407
5408         st->sb = super;
5409         if (st->ss == NULL) {
5410                 st->ss = &super_imsm;
5411                 st->minor_version = 0;
5412                 st->max_devs = IMSM_MAX_DEVICES;
5413         }
5414
5415         /* load migration record */
5416         if (load_imsm_migr_rec(super) == 0) {
5417                 /* Check for unsupported migration features */
5418                 if (check_mpb_migr_compatibility(super) != 0) {
5419                         pr_err("Unsupported migration detected");
5420                         if (devname)
5421                                 fprintf(stderr, " on %s\n", devname);
5422                         else
5423                                 fprintf(stderr, " (IMSM).\n");
5424                         return 3;
5425                 }
5426         }
5427
5428         return 0;
5429 }
5430
5431 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
5432 {
5433         if (info->level == 1)
5434                 return 128;
5435         return info->chunk_size >> 9;
5436 }
5437
5438 static unsigned long long info_to_blocks_per_member(mdu_array_info_t *info,
5439                                                     unsigned long long size)
5440 {
5441         if (info->level == 1)
5442                 return size * 2;
5443         else
5444                 return (size * 2) & ~(info_to_blocks_per_strip(info) - 1);
5445 }
5446
5447 static void imsm_update_version_info(struct intel_super *super)
5448 {
5449         /* update the version and attributes */
5450         struct imsm_super *mpb = super->anchor;
5451         char *version;
5452         struct imsm_dev *dev;
5453         struct imsm_map *map;
5454         int i;
5455
5456         for (i = 0; i < mpb->num_raid_devs; i++) {
5457                 dev = get_imsm_dev(super, i);
5458                 map = get_imsm_map(dev, MAP_0);
5459                 if (__le32_to_cpu(dev->size_high) > 0)
5460                         mpb->attributes |= MPB_ATTRIB_2TB;
5461
5462                 /* FIXME detect when an array spans a port multiplier */
5463                 #if 0
5464                 mpb->attributes |= MPB_ATTRIB_PM;
5465                 #endif
5466
5467                 if (mpb->num_raid_devs > 1 ||
5468                     mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
5469                         version = MPB_VERSION_ATTRIBS;
5470                         switch (get_imsm_raid_level(map)) {
5471                         case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
5472                         case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
5473                         case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
5474                         case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
5475                         }
5476                 } else {
5477                         if (map->num_members >= 5)
5478                                 version = MPB_VERSION_5OR6_DISK_ARRAY;
5479                         else if (dev->status == DEV_CLONE_N_GO)
5480                                 version = MPB_VERSION_CNG;
5481                         else if (get_imsm_raid_level(map) == 5)
5482                                 version = MPB_VERSION_RAID5;
5483                         else if (map->num_members >= 3)
5484                                 version = MPB_VERSION_3OR4_DISK_ARRAY;
5485                         else if (get_imsm_raid_level(map) == 1)
5486                                 version = MPB_VERSION_RAID1;
5487                         else
5488                                 version = MPB_VERSION_RAID0;
5489                 }
5490                 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
5491         }
5492 }
5493
5494 /**
5495  * imsm_check_name() - check imsm naming criteria.
5496  * @super: &intel_super pointer, not NULL.
5497  * @name: name to check.
5498  * @verbose: verbose level.
5499  *
5500  * Name must be no longer than &MAX_RAID_SERIAL_LEN and must be unique across volumes.
5501  *
5502  * Returns: &true if @name matches, &false otherwise.
5503  */
5504 static bool imsm_is_name_allowed(struct intel_super *super, const char * const name,
5505                                  const int verbose)
5506 {
5507         struct imsm_super *mpb = super->anchor;
5508         int i;
5509
5510         if (is_string_lq(name, MAX_RAID_SERIAL_LEN + 1) == false) {
5511                 pr_vrb("imsm: Name \"%s\" is too long\n", name);
5512                 return false;
5513         }
5514
5515         for (i = 0; i < mpb->num_raid_devs; i++) {
5516                 struct imsm_dev *dev = get_imsm_dev(super, i);
5517
5518                 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
5519                         pr_vrb("imsm: Name \"%s\" already exists\n", name);
5520                         return false;
5521                 }
5522         }
5523
5524         return true;
5525 }
5526
5527 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
5528                                   struct shape *s, char *name,
5529                                   char *homehost, int *uuid,
5530                                   long long data_offset)
5531 {
5532         /* We are creating a volume inside a pre-existing container.
5533          * so st->sb is already set.
5534          */
5535         struct intel_super *super = st->sb;
5536         unsigned int sector_size = super->sector_size;
5537         struct imsm_super *mpb = super->anchor;
5538         struct intel_dev *dv;
5539         struct imsm_dev *dev;
5540         struct imsm_vol *vol;
5541         struct imsm_map *map;
5542         int idx = mpb->num_raid_devs;
5543         int i;
5544         int namelen;
5545         unsigned long long array_blocks;
5546         size_t size_old, size_new;
5547         unsigned int data_disks;
5548         unsigned long long size_per_member;
5549
5550         if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
5551                 pr_err("This imsm-container already has the maximum of %d volumes\n", super->orom->vpa);
5552                 return 0;
5553         }
5554
5555         /* ensure the mpb is large enough for the new data */
5556         size_old = __le32_to_cpu(mpb->mpb_size);
5557         size_new = disks_to_mpb_size(info->nr_disks);
5558         if (size_new > size_old) {
5559                 void *mpb_new;
5560                 size_t size_round = ROUND_UP(size_new, sector_size);
5561
5562                 if (posix_memalign(&mpb_new, sector_size, size_round) != 0) {
5563                         pr_err("could not allocate new mpb\n");
5564                         return 0;
5565                 }
5566                 if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
5567                                    MIGR_REC_BUF_SECTORS*
5568                                    MAX_SECTOR_SIZE) != 0) {
5569                         pr_err("could not allocate migr_rec buffer\n");
5570                         free(super->buf);
5571                         free(super);
5572                         free(mpb_new);
5573                         return 0;
5574                 }
5575                 memcpy(mpb_new, mpb, size_old);
5576                 free(mpb);
5577                 mpb = mpb_new;
5578                 super->anchor = mpb_new;
5579                 mpb->mpb_size = __cpu_to_le32(size_new);
5580                 memset(mpb_new + size_old, 0, size_round - size_old);
5581                 super->len = size_round;
5582         }
5583         super->current_vol = idx;
5584
5585         /* handle 'failed_disks' by either:
5586          * a) create dummy disk entries in the table if this the first
5587          *    volume in the array.  We add them here as this is the only
5588          *    opportunity to add them. add_to_super_imsm_volume()
5589          *    handles the non-failed disks and continues incrementing
5590          *    mpb->num_disks.
5591          * b) validate that 'failed_disks' matches the current number
5592          *    of missing disks if the container is populated
5593          */
5594         if (super->current_vol == 0) {
5595                 mpb->num_disks = 0;
5596                 for (i = 0; i < info->failed_disks; i++) {
5597                         struct imsm_disk *disk;
5598
5599                         mpb->num_disks++;
5600                         disk = __get_imsm_disk(mpb, i);
5601                         disk->status = CONFIGURED_DISK | FAILED_DISK;
5602                         disk->scsi_id = __cpu_to_le32(~(__u32)0);
5603                         snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN,
5604                                  "missing:%d", (__u8)i);
5605                 }
5606                 find_missing(super);
5607         } else {
5608                 int missing = 0;
5609                 struct dl *d;
5610
5611                 for (d = super->missing; d; d = d->next)
5612                         missing++;
5613                 if (info->failed_disks > missing) {
5614                         pr_err("unable to add 'missing' disk to container\n");
5615                         return 0;
5616                 }
5617         }
5618
5619         if (imsm_is_name_allowed(super, name, 1) == false)
5620                 return 0;
5621
5622         dv = xmalloc(sizeof(*dv));
5623         dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
5624         /*
5625          * Explicitly allow truncating to not confuse gcc's
5626          * -Werror=stringop-truncation
5627          */
5628         namelen = min((int) strlen(name), MAX_RAID_SERIAL_LEN);
5629         memcpy(dev->volume, name, namelen);
5630         array_blocks = calc_array_size(info->level, info->raid_disks,
5631                                                info->layout, info->chunk_size,
5632                                                s->size * BLOCKS_PER_KB);
5633         data_disks = get_data_disks(info->level, info->layout,
5634                                     info->raid_disks);
5635         array_blocks = round_size_to_mb(array_blocks, data_disks);
5636         size_per_member = array_blocks / data_disks;
5637
5638         set_imsm_dev_size(dev, array_blocks);
5639         dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
5640         vol = &dev->vol;
5641         vol->migr_state = 0;
5642         set_migr_type(dev, MIGR_INIT);
5643         vol->dirty = !info->state;
5644         set_vol_curr_migr_unit(dev, 0);
5645         map = get_imsm_map(dev, MAP_0);
5646         set_pba_of_lba0(map, super->create_offset);
5647         map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
5648         map->failed_disk_num = ~0;
5649         if (info->level > 0)
5650                 map->map_state = (info->state ? IMSM_T_STATE_NORMAL
5651                                   : IMSM_T_STATE_UNINITIALIZED);
5652         else
5653                 map->map_state = info->failed_disks ? IMSM_T_STATE_FAILED :
5654                                                       IMSM_T_STATE_NORMAL;
5655         map->ddf = 1;
5656
5657         if (info->level == 1 && info->raid_disks > 2) {
5658                 free(dev);
5659                 free(dv);
5660                 pr_err("imsm does not support more than 2 disksin a raid1 volume\n");
5661                 return 0;
5662         }
5663
5664         map->raid_level = info->level;
5665         if (info->level == 10)
5666                 map->raid_level = 1;
5667         set_num_domains(map);
5668
5669         size_per_member += NUM_BLOCKS_DIRTY_STRIPE_REGION;
5670         set_blocks_per_member(map, info_to_blocks_per_member(info,
5671                                                              size_per_member /
5672                                                              BLOCKS_PER_KB));
5673
5674         map->num_members = info->raid_disks;
5675         update_num_data_stripes(map, array_blocks);
5676         for (i = 0; i < map->num_members; i++) {
5677                 /* initialized in add_to_super */
5678                 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
5679         }
5680         mpb->num_raid_devs++;
5681         mpb->num_raid_devs_created++;
5682         dev->my_vol_raid_dev_num = mpb->num_raid_devs_created;
5683
5684         if (s->consistency_policy <= CONSISTENCY_POLICY_RESYNC) {
5685                 dev->rwh_policy = RWH_MULTIPLE_OFF;
5686         } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
5687                 dev->rwh_policy = RWH_MULTIPLE_DISTRIBUTED;
5688         } else {
5689                 free(dev);
5690                 free(dv);
5691                 pr_err("imsm does not support consistency policy %s\n",
5692                        map_num_s(consistency_policies, s->consistency_policy));
5693                 return 0;
5694         }
5695
5696         dv->dev = dev;
5697         dv->index = super->current_vol;
5698         dv->next = super->devlist;
5699         super->devlist = dv;
5700
5701         imsm_update_version_info(super);
5702
5703         return 1;
5704 }
5705
5706 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
5707                            struct shape *s, char *name,
5708                            char *homehost, int *uuid,
5709                            unsigned long long data_offset)
5710 {
5711         /* This is primarily called by Create when creating a new array.
5712          * We will then get add_to_super called for each component, and then
5713          * write_init_super called to write it out to each device.
5714          * For IMSM, Create can create on fresh devices or on a pre-existing
5715          * array.
5716          * To create on a pre-existing array a different method will be called.
5717          * This one is just for fresh drives.
5718          */
5719         struct intel_super *super;
5720         struct imsm_super *mpb;
5721         size_t mpb_size;
5722         char *version;
5723
5724         if (data_offset != INVALID_SECTORS) {
5725                 pr_err("data-offset not supported by imsm\n");
5726                 return 0;
5727         }
5728
5729         if (st->sb)
5730                 return init_super_imsm_volume(st, info, s, name, homehost, uuid,
5731                                               data_offset);
5732
5733         if (info)
5734                 mpb_size = disks_to_mpb_size(info->nr_disks);
5735         else
5736                 mpb_size = MAX_SECTOR_SIZE;
5737
5738         super = alloc_super();
5739         if (super &&
5740             posix_memalign(&super->buf, MAX_SECTOR_SIZE, mpb_size) != 0) {
5741                 free_imsm(super);
5742                 super = NULL;
5743         }
5744         if (!super) {
5745                 pr_err("could not allocate superblock\n");
5746                 return 0;
5747         }
5748         if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
5749             MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) {
5750                 pr_err("could not allocate migr_rec buffer\n");
5751                 free(super->buf);
5752                 free_imsm(super);
5753                 return 0;
5754         }
5755         memset(super->buf, 0, mpb_size);
5756         mpb = super->buf;
5757         mpb->mpb_size = __cpu_to_le32(mpb_size);
5758         st->sb = super;
5759
5760         if (info == NULL) {
5761                 /* zeroing superblock */
5762                 return 0;
5763         }
5764
5765         mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
5766
5767         version = (char *) mpb->sig;
5768         strcpy(version, MPB_SIGNATURE);
5769         version += strlen(MPB_SIGNATURE);
5770         strcpy(version, MPB_VERSION_RAID0);
5771
5772         return 1;
5773 }
5774
5775 static int drive_validate_sector_size(struct intel_super *super, struct dl *dl)
5776 {
5777         unsigned int member_sector_size;
5778
5779         if (!is_fd_valid(dl->fd)) {
5780                 pr_err("Invalid file descriptor for %s\n", dl->devname);
5781                 return 0;
5782         }
5783
5784         if (!get_dev_sector_size(dl->fd, dl->devname, &member_sector_size))
5785                 return 0;
5786         if (member_sector_size != super->sector_size)
5787                 return 0;
5788         return 1;
5789 }
5790
5791 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
5792                                      int fd, char *devname)
5793 {
5794         struct intel_super *super = st->sb;
5795         struct imsm_super *mpb = super->anchor;
5796         struct imsm_disk *_disk;
5797         struct imsm_dev *dev;
5798         struct imsm_map *map;
5799         struct dl *dl, *df;
5800         int slot;
5801         int autolayout = 0;
5802
5803         if (!is_fd_valid(fd))
5804                 autolayout = 1;
5805
5806         dev = get_imsm_dev(super, super->current_vol);
5807         map = get_imsm_map(dev, MAP_0);
5808
5809         if (! (dk->state & (1<<MD_DISK_SYNC))) {
5810                 pr_err("%s: Cannot add spare devices to IMSM volume\n",
5811                         devname);
5812                 return 1;
5813         }
5814
5815         for (dl = super->disks; dl ; dl = dl->next) {
5816                 if (autolayout) {
5817                         if (dl->raiddisk == dk->raid_disk)
5818                                 break;
5819                 } else if (dl->major == dk->major && dl->minor == dk->minor)
5820                         break;
5821         }
5822
5823         if (!dl) {
5824                 if (!autolayout)
5825                         pr_err("%s is not a member of the same container.\n",
5826                                devname);
5827                 return 1;
5828         }
5829
5830         if (!autolayout && super->current_vol > 0) {
5831                 int _slot = get_disk_slot_in_dev(super, 0, dl->index);
5832
5833                 if (_slot != dk->raid_disk) {
5834                         pr_err("Member %s is in %d slot for the first volume, but is in %d slot for a new volume.\n",
5835                                dl->devname, _slot, dk->raid_disk);
5836                         pr_err("Raid members are in different order than for the first volume, aborting.\n");
5837                         return 1;
5838                 }
5839         }
5840
5841         if (mpb->num_disks == 0)
5842                 if (!get_dev_sector_size(dl->fd, dl->devname,
5843                                          &super->sector_size))
5844                         return 1;
5845
5846         if (!drive_validate_sector_size(super, dl)) {
5847                 pr_err("Combining drives of different sector size in one volume is not allowed\n");
5848                 return 1;
5849         }
5850
5851         /* add a pristine spare to the metadata */
5852         if (dl->index < 0) {
5853                 dl->index = super->anchor->num_disks;
5854                 super->anchor->num_disks++;
5855         }
5856         /* Check the device has not already been added */
5857         slot = get_imsm_disk_slot(map, dl->index);
5858         if (slot >= 0 &&
5859             (get_imsm_ord_tbl_ent(dev, slot, MAP_X) & IMSM_ORD_REBUILD) == 0) {
5860                 pr_err("%s has been included in this array twice\n",
5861                         devname);
5862                 return 1;
5863         }
5864         set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index);
5865         dl->disk.status = CONFIGURED_DISK;
5866
5867         /* update size of 'missing' disks to be at least as large as the
5868          * largest acitve member (we only have dummy missing disks when
5869          * creating the first volume)
5870          */
5871         if (super->current_vol == 0) {
5872                 for (df = super->missing; df; df = df->next) {
5873                         if (total_blocks(&dl->disk) > total_blocks(&df->disk))
5874                                 set_total_blocks(&df->disk, total_blocks(&dl->disk));
5875                         _disk = __get_imsm_disk(mpb, df->index);
5876                         *_disk = df->disk;
5877                 }
5878         }
5879
5880         /* refresh unset/failed slots to point to valid 'missing' entries */
5881         for (df = super->missing; df; df = df->next)
5882                 for (slot = 0; slot < mpb->num_disks; slot++) {
5883                         __u32 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
5884
5885                         if ((ord & IMSM_ORD_REBUILD) == 0)
5886                                 continue;
5887                         set_imsm_ord_tbl_ent(map, slot, df->index | IMSM_ORD_REBUILD);
5888                         if (is_gen_migration(dev)) {
5889                                 struct imsm_map *map2 = get_imsm_map(dev,
5890                                                                      MAP_1);
5891                                 int slot2 = get_imsm_disk_slot(map2, df->index);
5892                                 if (slot2 < map2->num_members && slot2 >= 0) {
5893                                         __u32 ord2 = get_imsm_ord_tbl_ent(dev,
5894                                                                          slot2,
5895                                                                          MAP_1);
5896                                         if ((unsigned)df->index ==
5897                                                                ord_to_idx(ord2))
5898                                                 set_imsm_ord_tbl_ent(map2,
5899                                                         slot2,
5900                                                         df->index |
5901                                                         IMSM_ORD_REBUILD);
5902                                 }
5903                         }
5904                         dprintf("set slot:%d to missing disk:%d\n", slot, df->index);
5905                         break;
5906                 }
5907
5908         /* if we are creating the first raid device update the family number */
5909         if (super->current_vol == 0) {
5910                 __u32 sum;
5911                 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
5912
5913                 _disk = __get_imsm_disk(mpb, dl->index);
5914                 if (!_disk) {
5915                         pr_err("BUG mpb setup error\n");
5916                         return 1;
5917                 }
5918                 *_dev = *dev;
5919                 *_disk = dl->disk;
5920                 sum = random32();
5921                 sum += __gen_imsm_checksum(mpb);
5922                 mpb->family_num = __cpu_to_le32(sum);
5923                 mpb->orig_family_num = mpb->family_num;
5924                 mpb->creation_time = __cpu_to_le64((__u64)time(NULL));
5925         }
5926         super->current_disk = dl;
5927         return 0;
5928 }
5929
5930 /* mark_spare()
5931  *   Function marks disk as spare and restores disk serial
5932  *   in case it was previously marked as failed by takeover operation
5933  * reruns:
5934  *   -1 : critical error
5935  *    0 : disk is marked as spare but serial is not set
5936  *    1 : success
5937  */
5938 int mark_spare(struct dl *disk)
5939 {
5940         __u8 serial[MAX_RAID_SERIAL_LEN];
5941         int ret_val = -1;
5942
5943         if (!disk)
5944                 return ret_val;
5945
5946         ret_val = 0;
5947         if (!imsm_read_serial(disk->fd, NULL, serial, MAX_RAID_SERIAL_LEN)) {
5948                 /* Restore disk serial number, because takeover marks disk
5949                  * as failed and adds to serial ':0' before it becomes
5950                  * a spare disk.
5951                  */
5952                 serialcpy(disk->serial, serial);
5953                 serialcpy(disk->disk.serial, serial);
5954                 ret_val = 1;
5955         }
5956         disk->disk.status = SPARE_DISK;
5957         disk->index = -1;
5958
5959         return ret_val;
5960 }
5961
5962
5963 static int write_super_imsm_spare(struct intel_super *super, struct dl *d);
5964
5965 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
5966                              int fd, char *devname,
5967                              unsigned long long data_offset)
5968 {
5969         struct intel_super *super = st->sb;
5970         struct dl *dd;
5971         unsigned long long size;
5972         unsigned int member_sector_size;
5973         __u32 id;
5974         int rv;
5975         struct stat stb;
5976
5977         /* If we are on an RAID enabled platform check that the disk is
5978          * attached to the raid controller.
5979          * We do not need to test disks attachment for container based additions,
5980          * they shall be already tested when container was created/assembled.
5981          */
5982         rv = find_intel_hba_capability(fd, super, devname);
5983         /* no orom/efi or non-intel hba of the disk */
5984         if (rv != 0) {
5985                 dprintf("capability: %p fd: %d ret: %d\n",
5986                         super->orom, fd, rv);
5987                 return 1;
5988         }
5989
5990         if (super->current_vol >= 0)
5991                 return add_to_super_imsm_volume(st, dk, fd, devname);
5992
5993         fstat(fd, &stb);
5994         dd = xcalloc(sizeof(*dd), 1);
5995         dd->major = major(stb.st_rdev);
5996         dd->minor = minor(stb.st_rdev);
5997         dd->devname = devname ? xstrdup(devname) : NULL;
5998         dd->fd = fd;
5999         dd->e = NULL;
6000         dd->action = DISK_ADD;
6001         rv = imsm_read_serial(fd, devname, dd->serial, MAX_RAID_SERIAL_LEN);
6002         if (rv) {
6003                 pr_err("failed to retrieve scsi serial, aborting\n");
6004                 __free_imsm_disk(dd, 0);
6005                 abort();
6006         }
6007
6008         if (super->hba && ((super->hba->type == SYS_DEV_NVME) ||
6009            (super->hba->type == SYS_DEV_VMD))) {
6010                 int i;
6011                 char cntrl_path[PATH_MAX];
6012                 char *cntrl_name;
6013                 char pci_dev_path[PATH_MAX];
6014
6015                 if (!diskfd_to_devpath(fd, 2, pci_dev_path) ||
6016                     !diskfd_to_devpath(fd, 1, cntrl_path)) {
6017                         pr_err("failed to get dev paths, aborting\n");
6018                         __free_imsm_disk(dd, 0);
6019                         return 1;
6020                 }
6021
6022                 cntrl_name = basename(cntrl_path);
6023                 if (is_multipath_nvme(fd))
6024                         pr_err("%s controller supports Multi-Path I/O, Intel (R) VROC does not support multipathing\n",
6025                                cntrl_name);
6026
6027                 if (devpath_to_vendor(pci_dev_path) == 0x8086) {
6028                         /*
6029                          * If Intel's NVMe drive has serial ended with
6030                          * "-A","-B","-1" or "-2" it means that this is "x8"
6031                          * device (double drive on single PCIe card).
6032                          * User should be warned about potential data loss.
6033                          */
6034                         for (i = MAX_RAID_SERIAL_LEN-1; i > 0; i--) {
6035                                 /* Skip empty character at the end */
6036                                 if (dd->serial[i] == 0)
6037                                         continue;
6038
6039                                 if (((dd->serial[i] == 'A') ||
6040                                    (dd->serial[i] == 'B') ||
6041                                    (dd->serial[i] == '1') ||
6042                                    (dd->serial[i] == '2')) &&
6043                                    (dd->serial[i-1] == '-'))
6044                                         pr_err("\tThe action you are about to take may put your data at risk.\n"
6045                                                 "\tPlease note that x8 devices may consist of two separate x4 devices "
6046                                                 "located on a single PCIe port.\n"
6047                                                 "\tRAID 0 is the only supported configuration for this type of x8 device.\n");
6048                                 break;
6049                         }
6050                 } else if (super->hba->type == SYS_DEV_VMD && super->orom &&
6051                     !imsm_orom_has_tpv_support(super->orom)) {
6052                         pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n"
6053                                "\tPlease refer to Intel(R) RSTe/VROC user guide.\n");
6054                         __free_imsm_disk(dd, 0);
6055                         return 1;
6056                 }
6057         }
6058
6059         get_dev_size(fd, NULL, &size);
6060         if (!get_dev_sector_size(fd, NULL, &member_sector_size)) {
6061                 __free_imsm_disk(dd, 0);
6062                 return 1;
6063         }
6064
6065         if (super->sector_size == 0) {
6066                 /* this a first device, so sector_size is not set yet */
6067                 super->sector_size = member_sector_size;
6068         }
6069
6070         /* clear migr_rec when adding disk to container */
6071         memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
6072         if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*member_sector_size,
6073             SEEK_SET) >= 0) {
6074                 if ((unsigned int)write(fd, super->migr_rec_buf,
6075                     MIGR_REC_BUF_SECTORS*member_sector_size) !=
6076                     MIGR_REC_BUF_SECTORS*member_sector_size)
6077                         perror("Write migr_rec failed");
6078         }
6079
6080         size /= 512;
6081         serialcpy(dd->disk.serial, dd->serial);
6082         set_total_blocks(&dd->disk, size);
6083         if (__le32_to_cpu(dd->disk.total_blocks_hi) > 0) {
6084                 struct imsm_super *mpb = super->anchor;
6085                 mpb->attributes |= MPB_ATTRIB_2TB_DISK;
6086         }
6087         mark_spare(dd);
6088         if (sysfs_disk_to_scsi_id(fd, &id) == 0)
6089                 dd->disk.scsi_id = __cpu_to_le32(id);
6090         else
6091                 dd->disk.scsi_id = __cpu_to_le32(0);
6092
6093         if (st->update_tail) {
6094                 dd->next = super->disk_mgmt_list;
6095                 super->disk_mgmt_list = dd;
6096         } else {
6097                 /* this is called outside of mdmon
6098                  * write initial spare metadata
6099                  * mdmon will overwrite it.
6100                  */
6101                 dd->next = super->disks;
6102                 super->disks = dd;
6103                 write_super_imsm_spare(super, dd);
6104         }
6105
6106         return 0;
6107 }
6108
6109 static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
6110 {
6111         struct intel_super *super = st->sb;
6112         struct dl *dd;
6113
6114         /* remove from super works only in mdmon - for communication
6115          * manager - monitor. Check if communication memory buffer
6116          * is prepared.
6117          */
6118         if (!st->update_tail) {
6119                 pr_err("shall be used in mdmon context only\n");
6120                 return 1;
6121         }
6122         dd = xcalloc(1, sizeof(*dd));
6123         dd->major = dk->major;
6124         dd->minor = dk->minor;
6125         dd->fd = -1;
6126         mark_spare(dd);
6127         dd->action = DISK_REMOVE;
6128
6129         dd->next = super->disk_mgmt_list;
6130         super->disk_mgmt_list = dd;
6131
6132         return 0;
6133 }
6134
6135 static int store_imsm_mpb(int fd, struct imsm_super *mpb);
6136
6137 static union {
6138         char buf[MAX_SECTOR_SIZE];
6139         struct imsm_super anchor;
6140 } spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE)));
6141
6142
6143 static int write_super_imsm_spare(struct intel_super *super, struct dl *d)
6144 {
6145         struct imsm_super *mpb = super->anchor;
6146         struct imsm_super *spare = &spare_record.anchor;
6147         __u32 sum;
6148
6149         if (d->index != -1)
6150                 return 1;
6151
6152         spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super));
6153         spare->generation_num = __cpu_to_le32(1UL);
6154         spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
6155         spare->num_disks = 1;
6156         spare->num_raid_devs = 0;
6157         spare->cache_size = mpb->cache_size;
6158         spare->pwr_cycle_count = __cpu_to_le32(1);
6159
6160         snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
6161                  MPB_SIGNATURE MPB_VERSION_RAID0);
6162
6163         spare->disk[0] = d->disk;
6164         if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
6165                 spare->attributes |= MPB_ATTRIB_2TB_DISK;
6166
6167         if (super->sector_size == 4096)
6168                 convert_to_4k_imsm_disk(&spare->disk[0]);
6169
6170         sum = __gen_imsm_checksum(spare);
6171         spare->family_num = __cpu_to_le32(sum);
6172         spare->orig_family_num = 0;
6173         sum = __gen_imsm_checksum(spare);
6174         spare->check_sum = __cpu_to_le32(sum);
6175
6176         if (store_imsm_mpb(d->fd, spare)) {
6177                 pr_err("failed for device %d:%d %s\n",
6178                         d->major, d->minor, strerror(errno));
6179                 return 1;
6180         }
6181
6182         return 0;
6183 }
6184 /* spare records have their own family number and do not have any defined raid
6185  * devices
6186  */
6187 static int write_super_imsm_spares(struct intel_super *super, int doclose)
6188 {
6189         struct dl *d;
6190
6191         for (d = super->disks; d; d = d->next) {
6192                 if (d->index != -1)
6193                         continue;
6194
6195                 if (write_super_imsm_spare(super, d))
6196                         return 1;
6197
6198                 if (doclose)
6199                         close_fd(&d->fd);
6200         }
6201
6202         return 0;
6203 }
6204
6205 static int write_super_imsm(struct supertype *st, int doclose)
6206 {
6207         struct intel_super *super = st->sb;
6208         unsigned int sector_size = super->sector_size;
6209         struct imsm_super *mpb = super->anchor;
6210         struct dl *d;
6211         __u32 generation;
6212         __u32 sum;
6213         int spares = 0;
6214         int i;
6215         __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
6216         int num_disks = 0;
6217         int clear_migration_record = 1;
6218         __u32 bbm_log_size;
6219
6220         /* 'generation' is incremented everytime the metadata is written */
6221         generation = __le32_to_cpu(mpb->generation_num);
6222         generation++;
6223         mpb->generation_num = __cpu_to_le32(generation);
6224
6225         /* fix up cases where previous mdadm releases failed to set
6226          * orig_family_num
6227          */
6228         if (mpb->orig_family_num == 0)
6229                 mpb->orig_family_num = mpb->family_num;
6230
6231         for (d = super->disks; d; d = d->next) {
6232                 if (d->index == -1)
6233                         spares++;
6234                 else {
6235                         mpb->disk[d->index] = d->disk;
6236                         num_disks++;
6237                 }
6238         }
6239         for (d = super->missing; d; d = d->next) {
6240                 mpb->disk[d->index] = d->disk;
6241                 num_disks++;
6242         }
6243         mpb->num_disks = num_disks;
6244         mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
6245
6246         for (i = 0; i < mpb->num_raid_devs; i++) {
6247                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
6248                 struct imsm_dev *dev2 = get_imsm_dev(super, i);
6249
6250                 imsm_copy_dev(dev, dev2);
6251                 mpb_size += sizeof_imsm_dev(dev, 0);
6252
6253                 if (is_gen_migration(dev2))
6254                         clear_migration_record = 0;
6255         }
6256
6257         bbm_log_size = get_imsm_bbm_log_size(super->bbm_log);
6258
6259         if (bbm_log_size) {
6260                 memcpy((void *)mpb + mpb_size, super->bbm_log, bbm_log_size);
6261                 mpb->attributes |= MPB_ATTRIB_BBM;
6262         } else
6263                 mpb->attributes &= ~MPB_ATTRIB_BBM;
6264
6265         super->anchor->bbm_log_size = __cpu_to_le32(bbm_log_size);
6266         mpb_size += bbm_log_size;
6267         mpb->mpb_size = __cpu_to_le32(mpb_size);
6268
6269 #ifdef DEBUG
6270         assert(super->len == 0 || mpb_size <= super->len);
6271 #endif
6272
6273         /* recalculate checksum */
6274         sum = __gen_imsm_checksum(mpb);
6275         mpb->check_sum = __cpu_to_le32(sum);
6276
6277         if (super->clean_migration_record_by_mdmon) {
6278                 clear_migration_record = 1;
6279                 super->clean_migration_record_by_mdmon = 0;
6280         }
6281         if (clear_migration_record)
6282                 memset(super->migr_rec_buf, 0,
6283                     MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
6284
6285         if (sector_size == 4096)
6286                 convert_to_4k(super);
6287
6288         /* write the mpb for disks that compose raid devices */
6289         for (d = super->disks; d ; d = d->next) {
6290                 if (d->index < 0 || is_failed(&d->disk))
6291                         continue;
6292
6293                 if (clear_migration_record) {
6294                         unsigned long long dsize;
6295
6296                         get_dev_size(d->fd, NULL, &dsize);
6297                         if (lseek64(d->fd, dsize - sector_size,
6298                             SEEK_SET) >= 0) {
6299                                 if ((unsigned int)write(d->fd,
6300                                     super->migr_rec_buf,
6301                                     MIGR_REC_BUF_SECTORS*sector_size) !=
6302                                     MIGR_REC_BUF_SECTORS*sector_size)
6303                                         perror("Write migr_rec failed");
6304                         }
6305                 }
6306
6307                 if (store_imsm_mpb(d->fd, mpb))
6308                         fprintf(stderr,
6309                                 "failed for device %d:%d (fd: %d)%s\n",
6310                                 d->major, d->minor,
6311                                 d->fd, strerror(errno));
6312
6313                 if (doclose)
6314                         close_fd(&d->fd);
6315         }
6316
6317         if (spares)
6318                 return write_super_imsm_spares(super, doclose);
6319
6320         return 0;
6321 }
6322
6323 static int create_array(struct supertype *st, int dev_idx)
6324 {
6325         size_t len;
6326         struct imsm_update_create_array *u;
6327         struct intel_super *super = st->sb;
6328         struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
6329         struct imsm_map *map = get_imsm_map(dev, MAP_0);
6330         struct disk_info *inf;
6331         struct imsm_disk *disk;
6332         int i;
6333
6334         len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
6335               sizeof(*inf) * map->num_members;
6336         u = xmalloc(len);
6337         u->type = update_create_array;
6338         u->dev_idx = dev_idx;
6339         imsm_copy_dev(&u->dev, dev);
6340         inf = get_disk_info(u);
6341         for (i = 0; i < map->num_members; i++) {
6342                 int idx = get_imsm_disk_idx(dev, i, MAP_X);
6343
6344                 disk = get_imsm_disk(super, idx);
6345                 if (!disk)
6346                         disk = get_imsm_missing(super, idx);
6347                 serialcpy(inf[i].serial, disk->serial);
6348         }
6349         append_metadata_update(st, u, len);
6350
6351         return 0;
6352 }
6353
6354 static int mgmt_disk(struct supertype *st)
6355 {
6356         struct intel_super *super = st->sb;
6357         size_t len;
6358         struct imsm_update_add_remove_disk *u;
6359
6360         if (!super->disk_mgmt_list)
6361                 return 0;
6362
6363         len = sizeof(*u);
6364         u = xmalloc(len);
6365         u->type = update_add_remove_disk;
6366         append_metadata_update(st, u, len);
6367
6368         return 0;
6369 }
6370
6371 __u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
6372
6373 static int write_ppl_header(unsigned long long ppl_sector, int fd, void *buf)
6374 {
6375         struct ppl_header *ppl_hdr = buf;
6376         int ret;
6377
6378         ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
6379
6380         if (lseek64(fd, ppl_sector * 512, SEEK_SET) < 0) {
6381                 ret = -errno;
6382                 perror("Failed to seek to PPL header location");
6383                 return ret;
6384         }
6385
6386         if (write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
6387                 ret = -errno;
6388                 perror("Write PPL header failed");
6389                 return ret;
6390         }
6391
6392         fsync(fd);
6393
6394         return 0;
6395 }
6396
6397 static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd)
6398 {
6399         struct intel_super *super = st->sb;
6400         void *buf;
6401         struct ppl_header *ppl_hdr;
6402         int ret;
6403
6404         /* first clear entire ppl space */
6405         ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size);
6406         if (ret)
6407                 return ret;
6408
6409         ret = posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE);
6410         if (ret) {
6411                 pr_err("Failed to allocate PPL header buffer\n");
6412                 return -ret;
6413         }
6414
6415         memset(buf, 0, PPL_HEADER_SIZE);
6416         ppl_hdr = buf;
6417         memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
6418         ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
6419
6420         if (info->mismatch_cnt) {
6421                 /*
6422                  * We are overwriting an invalid ppl. Make one entry with wrong
6423                  * checksum to prevent the kernel from skipping resync.
6424                  */
6425                 ppl_hdr->entries_count = __cpu_to_le32(1);
6426                 ppl_hdr->entries[0].checksum = ~0;
6427         }
6428
6429         ret = write_ppl_header(info->ppl_sector, fd, buf);
6430
6431         free(buf);
6432         return ret;
6433 }
6434
6435 static int is_rebuilding(struct imsm_dev *dev);
6436
6437 static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
6438                              struct mdinfo *disk)
6439 {
6440         struct intel_super *super = st->sb;
6441         struct dl *d;
6442         void *buf_orig, *buf, *buf_prev = NULL;
6443         int ret = 0;
6444         struct ppl_header *ppl_hdr = NULL;
6445         __u32 crc;
6446         struct imsm_dev *dev;
6447         __u32 idx;
6448         unsigned int i;
6449         unsigned long long ppl_offset = 0;
6450         unsigned long long prev_gen_num = 0;
6451
6452         if (disk->disk.raid_disk < 0)
6453                 return 0;
6454
6455         dev = get_imsm_dev(super, info->container_member);
6456         idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_0);
6457         d = get_imsm_dl_disk(super, idx);
6458
6459         if (!d || d->index < 0 || is_failed(&d->disk))
6460                 return 0;
6461
6462         if (posix_memalign(&buf_orig, MAX_SECTOR_SIZE, PPL_HEADER_SIZE * 2)) {
6463                 pr_err("Failed to allocate PPL header buffer\n");
6464                 return -1;
6465         }
6466         buf = buf_orig;
6467
6468         ret = 1;
6469         while (ppl_offset < MULTIPLE_PPL_AREA_SIZE_IMSM) {
6470                 void *tmp;
6471
6472                 dprintf("Checking potential PPL at offset: %llu\n", ppl_offset);
6473
6474                 if (lseek64(d->fd, info->ppl_sector * 512 + ppl_offset,
6475                             SEEK_SET) < 0) {
6476                         perror("Failed to seek to PPL header location");
6477                         ret = -1;
6478                         break;
6479                 }
6480
6481                 if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
6482                         perror("Read PPL header failed");
6483                         ret = -1;
6484                         break;
6485                 }
6486
6487                 ppl_hdr = buf;
6488
6489                 crc = __le32_to_cpu(ppl_hdr->checksum);
6490                 ppl_hdr->checksum = 0;
6491
6492                 if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
6493                         dprintf("Wrong PPL header checksum on %s\n",
6494                                 d->devname);
6495                         break;
6496                 }
6497
6498                 if (prev_gen_num > __le64_to_cpu(ppl_hdr->generation)) {
6499                         /* previous was newest, it was already checked */
6500                         break;
6501                 }
6502
6503                 if ((__le32_to_cpu(ppl_hdr->signature) !=
6504                               super->anchor->orig_family_num)) {
6505                         dprintf("Wrong PPL header signature on %s\n",
6506                                 d->devname);
6507                         ret = 1;
6508                         break;
6509                 }
6510
6511                 ret = 0;
6512                 prev_gen_num = __le64_to_cpu(ppl_hdr->generation);
6513
6514                 ppl_offset += PPL_HEADER_SIZE;
6515                 for (i = 0; i < __le32_to_cpu(ppl_hdr->entries_count); i++)
6516                         ppl_offset +=
6517                                    __le32_to_cpu(ppl_hdr->entries[i].pp_size);
6518
6519                 if (!buf_prev)
6520                         buf_prev = buf + PPL_HEADER_SIZE;
6521                 tmp = buf_prev;
6522                 buf_prev = buf;
6523                 buf = tmp;
6524         }
6525
6526         if (buf_prev) {
6527                 buf = buf_prev;
6528                 ppl_hdr = buf_prev;
6529         }
6530
6531         /*
6532          * Update metadata to use mutliple PPLs area (1MB).
6533          * This is done once for all RAID members
6534          */
6535         if (info->consistency_policy == CONSISTENCY_POLICY_PPL &&
6536             info->ppl_size != (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9)) {
6537                 char subarray[20];
6538                 struct mdinfo *member_dev;
6539
6540                 sprintf(subarray, "%d", info->container_member);
6541
6542                 if (mdmon_running(st->container_devnm))
6543                         st->update_tail = &st->updates;
6544
6545                 if (st->ss->update_subarray(st, subarray, UOPT_PPL, NULL)) {
6546                         pr_err("Failed to update subarray %s\n",
6547                               subarray);
6548                 } else {
6549                         if (st->update_tail)
6550                                 flush_metadata_updates(st);
6551                         else
6552                                 st->ss->sync_metadata(st);
6553                         info->ppl_size = (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
6554                         for (member_dev = info->devs; member_dev;
6555                              member_dev = member_dev->next)
6556                                 member_dev->ppl_size =
6557                                     (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
6558                 }
6559         }
6560
6561         if (ret == 1) {
6562                 struct imsm_map *map = get_imsm_map(dev, MAP_X);
6563
6564                 if (map->map_state == IMSM_T_STATE_UNINITIALIZED ||
6565                    (map->map_state == IMSM_T_STATE_NORMAL &&
6566                    !(dev->vol.dirty & RAIDVOL_DIRTY)) ||
6567                    (is_rebuilding(dev) &&
6568                     vol_curr_migr_unit(dev) == 0 &&
6569                     get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_1) != idx))
6570                         ret = st->ss->write_init_ppl(st, info, d->fd);
6571                 else
6572                         info->mismatch_cnt++;
6573         } else if (ret == 0 &&
6574                    ppl_hdr->entries_count == 0 &&
6575                    is_rebuilding(dev) &&
6576                    info->resync_start == 0) {
6577                 /*
6578                  * The header has no entries - add a single empty entry and
6579                  * rewrite the header to prevent the kernel from going into
6580                  * resync after an interrupted rebuild.
6581                  */
6582                 ppl_hdr->entries_count = __cpu_to_le32(1);
6583                 ret = write_ppl_header(info->ppl_sector, d->fd, buf);
6584         }
6585
6586         free(buf_orig);
6587
6588         return ret;
6589 }
6590
6591 static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info)
6592 {
6593         struct intel_super *super = st->sb;
6594         struct dl *d;
6595         int ret = 0;
6596
6597         if (info->consistency_policy != CONSISTENCY_POLICY_PPL ||
6598             info->array.level != 5)
6599                 return 0;
6600
6601         for (d = super->disks; d ; d = d->next) {
6602                 if (d->index < 0 || is_failed(&d->disk))
6603                         continue;
6604
6605                 ret = st->ss->write_init_ppl(st, info, d->fd);
6606                 if (ret)
6607                         break;
6608         }
6609
6610         return ret;
6611 }
6612
6613 /*******************************************************************************
6614  * Function:    write_init_bitmap_imsm_vol
6615  * Description: Write a bitmap header and prepares the area for the bitmap.
6616  * Parameters:
6617  *      st      : supertype information
6618  *      vol_idx : the volume index to use
6619  *
6620  * Returns:
6621  *       0 : success
6622  *      -1 : fail
6623  ******************************************************************************/
6624 static int write_init_bitmap_imsm_vol(struct supertype *st, int vol_idx)
6625 {
6626         struct intel_super *super = st->sb;
6627         int prev_current_vol = super->current_vol;
6628         struct dl *d;
6629         int ret = 0;
6630
6631         super->current_vol = vol_idx;
6632         for (d = super->disks; d; d = d->next) {
6633                 if (d->index < 0 || is_failed(&d->disk))
6634                         continue;
6635                 ret = st->ss->write_bitmap(st, d->fd, NoUpdate);
6636                 if (ret)
6637                         break;
6638         }
6639         super->current_vol = prev_current_vol;
6640         return ret;
6641 }
6642
6643 /*******************************************************************************
6644  * Function:    write_init_bitmap_imsm_all
6645  * Description: Write a bitmap header and prepares the area for the bitmap.
6646  *              Operation is executed for volumes with CONSISTENCY_POLICY_BITMAP.
6647  * Parameters:
6648  *      st      : supertype information
6649  *      info    : info about the volume where the bitmap should be written
6650  *      vol_idx : the volume index to use
6651  *
6652  * Returns:
6653  *       0 : success
6654  *      -1 : fail
6655  ******************************************************************************/
6656 static int write_init_bitmap_imsm_all(struct supertype *st, struct mdinfo *info,
6657                                       int vol_idx)
6658 {
6659         int ret = 0;
6660
6661         if (info && (info->consistency_policy == CONSISTENCY_POLICY_BITMAP))
6662                 ret = write_init_bitmap_imsm_vol(st, vol_idx);
6663
6664         return ret;
6665 }
6666
6667 static int write_init_super_imsm(struct supertype *st)
6668 {
6669         struct intel_super *super = st->sb;
6670         int current_vol = super->current_vol;
6671         int rv = 0;
6672         struct mdinfo info;
6673
6674         getinfo_super_imsm(st, &info, NULL);
6675
6676         /* we are done with current_vol reset it to point st at the container */
6677         super->current_vol = -1;
6678
6679         if (st->update_tail) {
6680                 /* queue the recently created array / added disk
6681                  * as a metadata update */
6682
6683                 /* determine if we are creating a volume or adding a disk */
6684                 if (current_vol < 0) {
6685                         /* in the mgmt (add/remove) disk case we are running
6686                          * in mdmon context, so don't close fd's
6687                          */
6688                         rv = mgmt_disk(st);
6689                 } else {
6690                         /* adding the second volume to the array */
6691                         rv = write_init_ppl_imsm_all(st, &info);
6692                         if (!rv)
6693                                 rv = write_init_bitmap_imsm_all(st, &info, current_vol);
6694                         if (!rv)
6695                                 rv = create_array(st, current_vol);
6696                 }
6697         } else {
6698                 struct dl *d;
6699                 for (d = super->disks; d; d = d->next)
6700                         Kill(d->devname, NULL, 0, -1, 1);
6701                 if (current_vol >= 0) {
6702                         rv = write_init_ppl_imsm_all(st, &info);
6703                         if (!rv)
6704                                 rv = write_init_bitmap_imsm_all(st, &info, current_vol);
6705                 }
6706
6707                 if (!rv)
6708                         rv = write_super_imsm(st, 1);
6709         }
6710
6711         return rv;
6712 }
6713
6714 static int store_super_imsm(struct supertype *st, int fd)
6715 {
6716         struct intel_super *super = st->sb;
6717         struct imsm_super *mpb = super ? super->anchor : NULL;
6718
6719         if (!mpb)
6720                 return 1;
6721
6722         if (super->sector_size == 4096)
6723                 convert_to_4k(super);
6724         return store_imsm_mpb(fd, mpb);
6725 }
6726
6727 static int validate_geometry_imsm_container(struct supertype *st, int level,
6728                                             int raiddisks,
6729                                             unsigned long long data_offset,
6730                                             char *dev,
6731                                             unsigned long long *freesize,
6732                                             int verbose)
6733 {
6734         int fd;
6735         unsigned long long ldsize;
6736         struct intel_super *super = NULL;
6737         int rv = 0;
6738
6739         if (!is_container(level))
6740                 return 0;
6741         if (!dev)
6742                 return 1;
6743
6744         fd = dev_open(dev, O_RDONLY|O_EXCL);
6745         if (!is_fd_valid(fd)) {
6746                 pr_vrb("imsm: Cannot open %s: %s\n", dev, strerror(errno));
6747                 return 0;
6748         }
6749         if (!get_dev_size(fd, dev, &ldsize))
6750                 goto exit;
6751
6752         /* capabilities retrieve could be possible
6753          * note that there is no fd for the disks in array.
6754          */
6755         super = alloc_super();
6756         if (!super)
6757                 goto exit;
6758
6759         if (!get_dev_sector_size(fd, NULL, &super->sector_size))
6760                 goto exit;
6761
6762         rv = find_intel_hba_capability(fd, super, verbose > 0 ? dev : NULL);
6763         if (rv != 0) {
6764 #if DEBUG
6765                 char str[256];
6766                 fd2devname(fd, str);
6767                 dprintf("fd: %d %s orom: %p rv: %d raiddisk: %d\n",
6768                         fd, str, super->orom, rv, raiddisks);
6769 #endif
6770                 /* no orom/efi or non-intel hba of the disk */
6771                 rv = 0;
6772                 goto exit;
6773         }
6774         if (super->orom) {
6775                 if (raiddisks > super->orom->tds) {
6776                         if (verbose)
6777                                 pr_err("%d exceeds maximum number of platform supported disks: %d\n",
6778                                         raiddisks, super->orom->tds);
6779                         goto exit;
6780                 }
6781                 if ((super->orom->attr & IMSM_OROM_ATTR_2TB_DISK) == 0 &&
6782                     (ldsize >> 9) >> 32 > 0) {
6783                         if (verbose)
6784                                 pr_err("%s exceeds maximum platform supported size\n", dev);
6785                         goto exit;
6786                 }
6787
6788                 if (super->hba->type == SYS_DEV_VMD ||
6789                     super->hba->type == SYS_DEV_NVME) {
6790                         if (!imsm_is_nvme_namespace_supported(fd, 1)) {
6791                                 if (verbose)
6792                                         pr_err("NVMe namespace %s is not supported by IMSM\n",
6793                                                 basename(dev));
6794                                 goto exit;
6795                         }
6796                 }
6797         }
6798         if (freesize)
6799                 *freesize = avail_size_imsm(st, ldsize >> 9, data_offset);
6800         rv = 1;
6801 exit:
6802         if (super)
6803                 free_imsm(super);
6804         close(fd);
6805
6806         return rv;
6807 }
6808
6809 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
6810 {
6811         const unsigned long long base_start = e[*idx].start;
6812         unsigned long long end = base_start + e[*idx].size;
6813         int i;
6814
6815         if (base_start == end)
6816                 return 0;
6817
6818         *idx = *idx + 1;
6819         for (i = *idx; i < num_extents; i++) {
6820                 /* extend overlapping extents */
6821                 if (e[i].start >= base_start &&
6822                     e[i].start <= end) {
6823                         if (e[i].size == 0)
6824                                 return 0;
6825                         if (e[i].start + e[i].size > end)
6826                                 end = e[i].start + e[i].size;
6827                 } else if (e[i].start > end) {
6828                         *idx = i;
6829                         break;
6830                 }
6831         }
6832
6833         return end - base_start;
6834 }
6835
6836 /** merge_extents() - analyze extents and get free size.
6837  * @super: Intel metadata, not NULL.
6838  * @expanding: if set, we are expanding &super->current_vol.
6839  *
6840  * Build a composite disk with all known extents and generate a size given the
6841  * "all disks in an array must share a common start offset" constraint.
6842  * If a volume is expanded, then return free space after the volume.
6843  *
6844  * Return: Free space or 0 on failure.
6845  */
6846 static unsigned long long merge_extents(struct intel_super *super, const bool expanding)
6847 {
6848         struct extent *e;
6849         struct dl *dl;
6850         int i, j, pos_vol_idx = -1;
6851         int extent_idx = 0;
6852         int sum_extents = 0;
6853         unsigned long long pos = 0;
6854         unsigned long long start = 0;
6855         unsigned long long free_size = 0;
6856
6857         unsigned long pre_reservation = 0;
6858         unsigned long post_reservation = IMSM_RESERVED_SECTORS;
6859         unsigned long reservation_size;
6860
6861         for (dl = super->disks; dl; dl = dl->next)
6862                 if (dl->e)
6863                         sum_extents += dl->extent_cnt;
6864         e = xcalloc(sum_extents, sizeof(struct extent));
6865
6866         /* coalesce and sort all extents. also, check to see if we need to
6867          * reserve space between member arrays
6868          */
6869         j = 0;
6870         for (dl = super->disks; dl; dl = dl->next) {
6871                 if (!dl->e)
6872                         continue;
6873                 for (i = 0; i < dl->extent_cnt; i++)
6874                         e[j++] = dl->e[i];
6875         }
6876         qsort(e, sum_extents, sizeof(*e), cmp_extent);
6877
6878         /* merge extents */
6879         i = 0;
6880         j = 0;
6881         while (i < sum_extents) {
6882                 e[j].start = e[i].start;
6883                 e[j].vol = e[i].vol;
6884                 e[j].size = find_size(e, &i, sum_extents);
6885                 j++;
6886                 if (e[j-1].size == 0)
6887                         break;
6888         }
6889
6890         i = 0;
6891         do {
6892                 unsigned long long esize = e[i].start - pos;
6893
6894                 if (expanding ? pos_vol_idx == super->current_vol : esize >= free_size) {
6895                         free_size = esize;
6896                         start = pos;
6897                         extent_idx = i;
6898                 }
6899
6900                 pos = e[i].start + e[i].size;
6901                 pos_vol_idx = e[i].vol;
6902
6903                 i++;
6904         } while (e[i-1].size);
6905
6906         if (free_size == 0) {
6907                 dprintf("imsm: Cannot find free size.\n");
6908                 free(e);
6909                 return 0;
6910         }
6911
6912         if (!expanding && extent_idx != 0)
6913                 /*
6914                  * Not a real first volume in a container is created, pre_reservation is needed.
6915                  */
6916                 pre_reservation = IMSM_RESERVED_SECTORS;
6917
6918         if (e[extent_idx].size == 0)
6919                 /*
6920                  * extent_idx points to the metadata, post_reservation is allready done.
6921                  */
6922                 post_reservation = 0;
6923         free(e);
6924
6925         reservation_size = pre_reservation + post_reservation;
6926
6927         if (free_size < reservation_size) {
6928                 dprintf("imsm: Reservation size is greater than free space.\n");
6929                 return 0;
6930         }
6931
6932         super->create_offset = start + pre_reservation;
6933         return free_size - reservation_size;
6934 }
6935
6936 static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
6937 {
6938         if (level < 0 || level == 6 || level == 4)
6939                 return 0;
6940
6941         /* if we have an orom prevent invalid raid levels */
6942         if (orom)
6943                 switch (level) {
6944                 case 0: return imsm_orom_has_raid0(orom);
6945                 case 1:
6946                         if (raiddisks > 2)
6947                                 return imsm_orom_has_raid1e(orom);
6948                         return imsm_orom_has_raid1(orom) && raiddisks == 2;
6949                 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
6950                 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
6951                 }
6952         else
6953                 return 1; /* not on an Intel RAID platform so anything goes */
6954
6955         return 0;
6956 }
6957
6958 static int
6959 active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
6960                         int dpa, int verbose)
6961 {
6962         struct mdstat_ent *mdstat = mdstat_read(0, 0);
6963         struct mdstat_ent *memb;
6964         int count = 0;
6965         int num = 0;
6966         struct md_list *dv;
6967         int found;
6968
6969         for (memb = mdstat ; memb ; memb = memb->next) {
6970                 if (memb->metadata_version &&
6971                     (strncmp(memb->metadata_version, "external:", 9) == 0) &&
6972                     (strcmp(&memb->metadata_version[9], name) == 0) &&
6973                     !is_subarray(memb->metadata_version+9) &&
6974                     memb->members) {
6975                         struct dev_member *dev = memb->members;
6976                         int fd = -1;
6977                         while (dev && !is_fd_valid(fd)) {
6978                                 char *path = xmalloc(strlen(dev->name) + strlen("/dev/") + 1);
6979                                 num = snprintf(path, PATH_MAX, "%s%s", "/dev/", dev->name);
6980                                 if (num > 0)
6981                                         fd = open(path, O_RDONLY, 0);
6982                                 if (num <= 0 || !is_fd_valid(fd)) {
6983                                         pr_vrb("Cannot open %s: %s\n",
6984                                                dev->name, strerror(errno));
6985                                 }
6986                                 free(path);
6987                                 dev = dev->next;
6988                         }
6989                         found = 0;
6990                         if (is_fd_valid(fd) && disk_attached_to_hba(fd, hba)) {
6991                                 struct mdstat_ent *vol;
6992                                 for (vol = mdstat ; vol ; vol = vol->next) {
6993                                         if (vol->active > 0 &&
6994                                             vol->metadata_version &&
6995                                             is_container_member(vol, memb->devnm)) {
6996                                                 found++;
6997                                                 count++;
6998                                         }
6999                                 }
7000                                 if (*devlist && (found < dpa)) {
7001                                         dv = xcalloc(1, sizeof(*dv));
7002                                         dv->devname = xmalloc(strlen(memb->devnm) + strlen("/dev/") + 1);
7003                                         sprintf(dv->devname, "%s%s", "/dev/", memb->devnm);
7004                                         dv->found = found;
7005                                         dv->used = 0;
7006                                         dv->next = *devlist;
7007                                         *devlist = dv;
7008                                 }
7009                         }
7010                         close_fd(&fd);
7011                 }
7012         }
7013         free_mdstat(mdstat);
7014         return count;
7015 }
7016
7017 #ifdef DEBUG_LOOP
7018 static struct md_list*
7019 get_loop_devices(void)
7020 {
7021         int i;
7022         struct md_list *devlist = NULL;
7023         struct md_list *dv;
7024
7025         for(i = 0; i < 12; i++) {
7026                 dv = xcalloc(1, sizeof(*dv));
7027                 dv->devname = xmalloc(40);
7028                 sprintf(dv->devname, "/dev/loop%d", i);
7029                 dv->next = devlist;
7030                 devlist = dv;
7031         }
7032         return devlist;
7033 }
7034 #endif
7035
7036 static struct md_list*
7037 get_devices(const char *hba_path)
7038 {
7039         struct md_list *devlist = NULL;
7040         struct md_list *dv;
7041         struct dirent *ent;
7042         DIR *dir;
7043         int err = 0;
7044
7045 #if DEBUG_LOOP
7046         devlist = get_loop_devices();
7047         return devlist;
7048 #endif
7049         /* scroll through /sys/dev/block looking for devices attached to
7050          * this hba
7051          */
7052         dir = opendir("/sys/dev/block");
7053         for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
7054                 int fd;
7055                 char buf[1024];
7056                 int major, minor;
7057                 char *path = NULL;
7058                 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
7059                         continue;
7060                 path = devt_to_devpath(makedev(major, minor), 1, NULL);
7061                 if (!path)
7062                         continue;
7063                 if (!path_attached_to_hba(path, hba_path)) {
7064                         free(path);
7065                         path = NULL;
7066                         continue;
7067                 }
7068                 free(path);
7069                 path = NULL;
7070                 fd = dev_open(ent->d_name, O_RDONLY);
7071                 if (is_fd_valid(fd)) {
7072                         fd2devname(fd, buf);
7073                         close(fd);
7074                 } else {
7075                         pr_err("cannot open device: %s\n",
7076                                 ent->d_name);
7077                         continue;
7078                 }
7079
7080                 dv = xcalloc(1, sizeof(*dv));
7081                 dv->devname = xstrdup(buf);
7082                 dv->next = devlist;
7083                 devlist = dv;
7084         }
7085         if (err) {
7086                 while(devlist) {
7087                         dv = devlist;
7088                         devlist = devlist->next;
7089                         free(dv->devname);
7090                         free(dv);
7091                 }
7092         }
7093         closedir(dir);
7094         return devlist;
7095 }
7096
7097 static int
7098 count_volumes_list(struct md_list *devlist, char *homehost,
7099                    int verbose, int *found)
7100 {
7101         struct md_list *tmpdev;
7102         int count = 0;
7103         struct supertype *st;
7104
7105         /* first walk the list of devices to find a consistent set
7106          * that match the criterea, if that is possible.
7107          * We flag the ones we like with 'used'.
7108          */
7109         *found = 0;
7110         st = match_metadata_desc_imsm("imsm");
7111         if (st == NULL) {
7112                 pr_vrb("cannot allocate memory for imsm supertype\n");
7113                 return 0;
7114         }
7115
7116         for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
7117                 char *devname = tmpdev->devname;
7118                 dev_t rdev;
7119                 struct supertype *tst;
7120                 int dfd;
7121                 if (tmpdev->used > 1)
7122                         continue;
7123                 tst = dup_super(st);
7124                 if (tst == NULL) {
7125                         pr_vrb("cannot allocate memory for imsm supertype\n");
7126                         goto err_1;
7127                 }
7128                 tmpdev->container = 0;
7129                 dfd = dev_open(devname, O_RDONLY|O_EXCL);
7130                 if (!is_fd_valid(dfd)) {
7131                         dprintf("cannot open device %s: %s\n",
7132                                 devname, strerror(errno));
7133                         tmpdev->used = 2;
7134                 } else if (!fstat_is_blkdev(dfd, devname, &rdev)) {
7135                         tmpdev->used = 2;
7136                 } else if (must_be_container(dfd)) {
7137                         struct supertype *cst;
7138                         cst = super_by_fd(dfd, NULL);
7139                         if (cst == NULL) {
7140                                 dprintf("cannot recognize container type %s\n",
7141                                         devname);
7142                                 tmpdev->used = 2;
7143                         } else if (tst->ss != st->ss) {
7144                                 dprintf("non-imsm container - ignore it: %s\n",
7145                                         devname);
7146                                 tmpdev->used = 2;
7147                         } else if (!tst->ss->load_container ||
7148                                    tst->ss->load_container(tst, dfd, NULL))
7149                                 tmpdev->used = 2;
7150                         else {
7151                                 tmpdev->container = 1;
7152                         }
7153                         if (cst)
7154                                 cst->ss->free_super(cst);
7155                 } else {
7156                         tmpdev->st_rdev = rdev;
7157                         if (tst->ss->load_super(tst,dfd, NULL)) {
7158                                 dprintf("no RAID superblock on %s\n",
7159                                         devname);
7160                                 tmpdev->used = 2;
7161                         } else if (tst->ss->compare_super == NULL) {
7162                                 dprintf("Cannot assemble %s metadata on %s\n",
7163                                         tst->ss->name, devname);
7164                                 tmpdev->used = 2;
7165                         }
7166                 }
7167                 close_fd(&dfd);
7168
7169                 if (tmpdev->used == 2 || tmpdev->used == 4) {
7170                         /* Ignore unrecognised devices during auto-assembly */
7171                         goto loop;
7172                 }
7173                 else {
7174                         struct mdinfo info;
7175                         tst->ss->getinfo_super(tst, &info, NULL);
7176
7177                         if (st->minor_version == -1)
7178                                 st->minor_version = tst->minor_version;
7179
7180                         if (memcmp(info.uuid, uuid_zero,
7181                                    sizeof(int[4])) == 0) {
7182                                 /* this is a floating spare.  It cannot define
7183                                  * an array unless there are no more arrays of
7184                                  * this type to be found.  It can be included
7185                                  * in an array of this type though.
7186                                  */
7187                                 tmpdev->used = 3;
7188                                 goto loop;
7189                         }
7190
7191                         if (st->ss != tst->ss ||
7192                             st->minor_version != tst->minor_version ||
7193                             st->ss->compare_super(st, tst, 1) != 0) {
7194                                 /* Some mismatch. If exactly one array matches this host,
7195                                  * we can resolve on that one.
7196                                  * Or, if we are auto assembling, we just ignore the second
7197                                  * for now.
7198                                  */
7199                                 dprintf("superblock on %s doesn't match others - assembly aborted\n",
7200                                         devname);
7201                                 goto loop;
7202                         }
7203                         tmpdev->used = 1;
7204                         *found = 1;
7205                         dprintf("found: devname: %s\n", devname);
7206                 }
7207         loop:
7208                 if (tst)
7209                         tst->ss->free_super(tst);
7210         }
7211         if (*found != 0) {
7212                 int err;
7213                 if ((err = load_super_imsm_all(st, -1, &st->sb, NULL, devlist, 0)) == 0) {
7214                         struct mdinfo *iter, *head = st->ss->container_content(st, NULL);
7215                         for (iter = head; iter; iter = iter->next) {
7216                                 dprintf("content->text_version: %s vol\n",
7217                                         iter->text_version);
7218                                 if (iter->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
7219                                         /* do not assemble arrays with unsupported
7220                                            configurations */
7221                                         dprintf("Cannot activate member %s.\n",
7222                                                 iter->text_version);
7223                                 } else
7224                                         count++;
7225                         }
7226                         sysfs_free(head);
7227
7228                 } else {
7229                         dprintf("No valid super block on device list: err: %d %p\n",
7230                                 err, st->sb);
7231                 }
7232         } else {
7233                 dprintf("no more devices to examine\n");
7234         }
7235
7236         for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
7237                 if (tmpdev->used == 1 && tmpdev->found) {
7238                         if (count) {
7239                                 if (count < tmpdev->found)
7240                                         count = 0;
7241                                 else
7242                                         count -= tmpdev->found;
7243                         }
7244                 }
7245                 if (tmpdev->used == 1)
7246                         tmpdev->used = 4;
7247         }
7248         err_1:
7249         if (st)
7250                 st->ss->free_super(st);
7251         return count;
7252 }
7253
7254 static int __count_volumes(char *hba_path, int dpa, int verbose,
7255                            int cmp_hba_path)
7256 {
7257         struct sys_dev *idev, *intel_devices = find_intel_devices();
7258         int count = 0;
7259         const struct orom_entry *entry;
7260         struct devid_list *dv, *devid_list;
7261
7262         if (!hba_path)
7263                 return 0;
7264
7265         for (idev = intel_devices; idev; idev = idev->next) {
7266                 if (strstr(idev->path, hba_path))
7267                         break;
7268         }
7269
7270         if (!idev || !idev->dev_id)
7271                 return 0;
7272
7273         entry = get_orom_entry_by_device_id(idev->dev_id);
7274
7275         if (!entry || !entry->devid_list)
7276                 return 0;
7277
7278         devid_list = entry->devid_list;
7279         for (dv = devid_list; dv; dv = dv->next) {
7280                 struct md_list *devlist;
7281                 struct sys_dev *device = NULL;
7282                 char *hpath;
7283                 int found = 0;
7284
7285                 if (cmp_hba_path)
7286                         device = device_by_id_and_path(dv->devid, hba_path);
7287                 else
7288                         device = device_by_id(dv->devid);
7289
7290                 if (device)
7291                         hpath = device->path;
7292                 else
7293                         return 0;
7294
7295                 devlist = get_devices(hpath);
7296                 /* if no intel devices return zero volumes */
7297                 if (devlist == NULL)
7298                         return 0;
7299
7300                 count += active_arrays_by_format("imsm", hpath, &devlist, dpa,
7301                                                  verbose);
7302                 dprintf("path: %s active arrays: %d\n", hpath, count);
7303                 if (devlist == NULL)
7304                         return 0;
7305                 do  {
7306                         found = 0;
7307                         count += count_volumes_list(devlist,
7308                                                         NULL,
7309                                                         verbose,
7310                                                         &found);
7311                         dprintf("found %d count: %d\n", found, count);
7312                 } while (found);
7313
7314                 dprintf("path: %s total number of volumes: %d\n", hpath, count);
7315
7316                 while (devlist) {
7317                         struct md_list *dv = devlist;
7318                         devlist = devlist->next;
7319                         free(dv->devname);
7320                         free(dv);
7321                 }
7322         }
7323         return count;
7324 }
7325
7326 static int count_volumes(struct intel_hba *hba, int dpa, int verbose)
7327 {
7328         if (!hba)
7329                 return 0;
7330         if (hba->type == SYS_DEV_VMD) {
7331                 struct sys_dev *dev;
7332                 int count = 0;
7333
7334                 for (dev = find_intel_devices(); dev; dev = dev->next) {
7335                         if (dev->type == SYS_DEV_VMD)
7336                                 count += __count_volumes(dev->path, dpa,
7337                                                          verbose, 1);
7338                 }
7339                 return count;
7340         }
7341         return __count_volumes(hba->path, dpa, verbose, 0);
7342 }
7343
7344 static int imsm_default_chunk(const struct imsm_orom *orom)
7345 {
7346         /* up to 512 if the plaform supports it, otherwise the platform max.
7347          * 128 if no platform detected
7348          */
7349         int fs = max(7, orom ? fls(orom->sss) : 0);
7350
7351         return min(512, (1 << fs));
7352 }
7353
7354 static int
7355 validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
7356                             int raiddisks, int *chunk, unsigned long long size, int verbose)
7357 {
7358         /* check/set platform and metadata limits/defaults */
7359         if (super->orom && raiddisks > super->orom->dpa) {
7360                 pr_vrb("platform supports a maximum of %d disks per array\n",
7361                        super->orom->dpa);
7362                 return 0;
7363         }
7364
7365         /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
7366         if (!is_raid_level_supported(super->orom, level, raiddisks)) {
7367                 pr_vrb("platform does not support raid%d with %d disk%s\n",
7368                         level, raiddisks, raiddisks > 1 ? "s" : "");
7369                 return 0;
7370         }
7371
7372         if (*chunk == 0 || *chunk == UnSet)
7373                 *chunk = imsm_default_chunk(super->orom);
7374
7375         if (super->orom && !imsm_orom_has_chunk(super->orom, *chunk)) {
7376                 pr_vrb("platform does not support a chunk size of: %d\n", *chunk);
7377                 return 0;
7378         }
7379
7380         if (layout != imsm_level_to_layout(level)) {
7381                 if (level == 5)
7382                         pr_vrb("imsm raid 5 only supports the left-asymmetric layout\n");
7383                 else if (level == 10)
7384                         pr_vrb("imsm raid 10 only supports the n2 layout\n");
7385                 else
7386                         pr_vrb("imsm unknown layout %#x for this raid level %d\n",
7387                                 layout, level);
7388                 return 0;
7389         }
7390
7391         if (super->orom && (super->orom->attr & IMSM_OROM_ATTR_2TB) == 0 &&
7392                         (calc_array_size(level, raiddisks, layout, *chunk, size) >> 32) > 0) {
7393                 pr_vrb("platform does not support a volume size over 2TB\n");
7394                 return 0;
7395         }
7396
7397         return 1;
7398 }
7399
7400 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
7401  * FIX ME add ahci details
7402  */
7403 static int validate_geometry_imsm_volume(struct supertype *st, int level,
7404                                          int layout, int raiddisks, int *chunk,
7405                                          unsigned long long size,
7406                                          unsigned long long data_offset,
7407                                          char *dev,
7408                                          unsigned long long *freesize,
7409                                          int verbose)
7410 {
7411         dev_t rdev;
7412         struct intel_super *super = st->sb;
7413         struct imsm_super *mpb;
7414         struct dl *dl;
7415         unsigned long long pos = 0;
7416         unsigned long long maxsize;
7417         struct extent *e;
7418         int i;
7419
7420         /* We must have the container info already read in. */
7421         if (!super)
7422                 return 0;
7423
7424         mpb = super->anchor;
7425
7426         if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) {
7427                 pr_err("RAID geometry validation failed. Cannot proceed with the action(s).\n");
7428                 return 0;
7429         }
7430         if (!dev) {
7431                 /* General test:  make sure there is space for
7432                  * 'raiddisks' device extents of size 'size' at a given
7433                  * offset
7434                  */
7435                 unsigned long long minsize = size;
7436                 unsigned long long start_offset = MaxSector;
7437                 int dcnt = 0;
7438                 if (minsize == 0)
7439                         minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
7440                 for (dl = super->disks; dl ; dl = dl->next) {
7441                         int found = 0;
7442
7443                         pos = 0;
7444                         i = 0;
7445                         e = get_extents(super, dl, 0);
7446                         if (!e) continue;
7447                         do {
7448                                 unsigned long long esize;
7449                                 esize = e[i].start - pos;
7450                                 if (esize >= minsize)
7451                                         found = 1;
7452                                 if (found && start_offset == MaxSector) {
7453                                         start_offset = pos;
7454                                         break;
7455                                 } else if (found && pos != start_offset) {
7456                                         found = 0;
7457                                         break;
7458                                 }
7459                                 pos = e[i].start + e[i].size;
7460                                 i++;
7461                         } while (e[i-1].size);
7462                         if (found)
7463                                 dcnt++;
7464                         free(e);
7465                 }
7466                 if (dcnt < raiddisks) {
7467                         if (verbose)
7468                                 pr_err("imsm: Not enough devices with space for this array (%d < %d)\n",
7469                                         dcnt, raiddisks);
7470                         return 0;
7471                 }
7472                 return 1;
7473         }
7474
7475         /* This device must be a member of the set */
7476         if (!stat_is_blkdev(dev, &rdev))
7477                 return 0;
7478         for (dl = super->disks ; dl ; dl = dl->next) {
7479                 if (dl->major == (int)major(rdev) &&
7480                     dl->minor == (int)minor(rdev))
7481                         break;
7482         }
7483         if (!dl) {
7484                 if (verbose)
7485                         pr_err("%s is not in the same imsm set\n", dev);
7486                 return 0;
7487         } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
7488                 /* If a volume is present then the current creation attempt
7489                  * cannot incorporate new spares because the orom may not
7490                  * understand this configuration (all member disks must be
7491                  * members of each array in the container).
7492                  */
7493                 pr_err("%s is a spare and a volume is already defined for this container\n", dev);
7494                 pr_err("The option-rom requires all member disks to be a member of all volumes\n");
7495                 return 0;
7496         } else if (super->orom && mpb->num_raid_devs > 0 &&
7497                    mpb->num_disks != raiddisks) {
7498                 pr_err("The option-rom requires all member disks to be a member of all volumes\n");
7499                 return 0;
7500         }
7501
7502         /* retrieve the largest free space block */
7503         e = get_extents(super, dl, 0);
7504         maxsize = 0;
7505         i = 0;
7506         if (e) {
7507                 do {
7508                         unsigned long long esize;
7509
7510                         esize = e[i].start - pos;
7511                         if (esize >= maxsize)
7512                                 maxsize = esize;
7513                         pos = e[i].start + e[i].size;
7514                         i++;
7515                 } while (e[i-1].size);
7516                 dl->e = e;
7517                 dl->extent_cnt = i;
7518         } else {
7519                 if (verbose)
7520                         pr_err("unable to determine free space for: %s\n",
7521                                 dev);
7522                 return 0;
7523         }
7524         if (maxsize < size) {
7525                 if (verbose)
7526                         pr_err("%s not enough space (%llu < %llu)\n",
7527                                 dev, maxsize, size);
7528                 return 0;
7529         }
7530
7531         maxsize = merge_extents(super, false);
7532
7533         if (mpb->num_raid_devs > 0 && size && size != maxsize)
7534                 pr_err("attempting to create a second volume with size less then remaining space.\n");
7535
7536         if (maxsize < size || maxsize == 0) {
7537                 if (verbose) {
7538                         if (maxsize == 0)
7539                                 pr_err("no free space left on device. Aborting...\n");
7540                         else
7541                                 pr_err("not enough space to create volume of given size (%llu < %llu). Aborting...\n",
7542                                                 maxsize, size);
7543                 }
7544                 return 0;
7545         }
7546
7547         *freesize = maxsize;
7548
7549         if (super->orom) {
7550                 int count = count_volumes(super->hba,
7551                                       super->orom->dpa, verbose);
7552                 if (super->orom->vphba <= count) {
7553                         pr_vrb("platform does not support more than %d raid volumes.\n",
7554                                super->orom->vphba);
7555                         return 0;
7556                 }
7557         }
7558         return 1;
7559 }
7560
7561 /**
7562  * imsm_get_free_size() - get the biggest, common free space from members.
7563  * @super: &intel_super pointer, not NULL.
7564  * @raiddisks: number of raid disks.
7565  * @size: requested size, could be 0 (means max size).
7566  * @chunk: requested chunk size in KiB.
7567  * @freesize: pointer for returned size value.
7568  *
7569  * Return: &IMSM_STATUS_OK or &IMSM_STATUS_ERROR.
7570  *
7571  * @freesize is set to meaningful value, this can be @size, or calculated
7572  * max free size.
7573  * super->create_offset value is modified and set appropriately in
7574  * merge_extends() for further creation.
7575  */
7576 static imsm_status_t imsm_get_free_size(struct intel_super *super,
7577                                         const int raiddisks,
7578                                         unsigned long long size,
7579                                         const int chunk,
7580                                         unsigned long long *freesize,
7581                                         bool expanding)
7582 {
7583         struct imsm_super *mpb = super->anchor;
7584         struct dl *dl;
7585         int i;
7586         struct extent *e;
7587         int cnt = 0;
7588         int used = 0;
7589         unsigned long long maxsize;
7590         unsigned long long minsize = size;
7591
7592         if (minsize == 0)
7593                 minsize = chunk * 2;
7594
7595         /* find the largest common start free region of the possible disks */
7596         for (dl = super->disks; dl; dl = dl->next) {
7597                 dl->raiddisk = -1;
7598
7599                 if (dl->index >= 0)
7600                         used++;
7601
7602                 /* don't activate new spares if we are orom constrained
7603                  * and there is already a volume active in the container
7604                  */
7605                 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
7606                         continue;
7607
7608                 e = get_extents(super, dl, 0);
7609                 if (!e)
7610                         continue;
7611                 for (i = 1; e[i-1].size; i++)
7612                         ;
7613                 dl->e = e;
7614                 dl->extent_cnt = i;
7615                 cnt++;
7616         }
7617
7618         maxsize = merge_extents(super, expanding);
7619         if (maxsize < minsize)  {
7620                 pr_err("imsm: Free space is %llu but must be equal or larger than %llu.\n",
7621                        maxsize, minsize);
7622                 return IMSM_STATUS_ERROR;
7623         }
7624
7625         if (cnt < raiddisks || (super->orom && used && used != raiddisks)) {
7626                 pr_err("imsm: Not enough devices with space to create array.\n");
7627                 return IMSM_STATUS_ERROR;
7628         }
7629
7630         if (size == 0) {
7631                 size = maxsize;
7632                 if (chunk) {
7633                         size /= 2 * chunk;
7634                         size *= 2 * chunk;
7635                 }
7636                 maxsize = size;
7637         }
7638         if (mpb->num_raid_devs > 0 && size && size != maxsize)
7639                 pr_err("attempting to create a second volume with size less then remaining space.\n");
7640         *freesize = size;
7641
7642         dprintf("imsm: imsm_get_free_size() returns : %llu\n", size);
7643
7644         return IMSM_STATUS_OK;
7645 }
7646
7647 /**
7648  * autolayout_imsm() - automatically layout a new volume.
7649  * @super: &intel_super pointer, not NULL.
7650  * @raiddisks: number of raid disks.
7651  * @size: requested size, could be 0 (means max size).
7652  * @chunk: requested chunk.
7653  * @freesize: pointer for returned size value.
7654  *
7655  * We are being asked to automatically layout a new volume based on the current
7656  * contents of the container. If the parameters can be satisfied autolayout_imsm
7657  * will record the disks, start offset, and will return size of the volume to
7658  * be created. See imsm_get_free_size() for details.
7659  * add_to_super() and getinfo_super() detect when autolayout is in progress.
7660  * If first volume exists, slots are set consistently to it.
7661  *
7662  * Return: &IMSM_STATUS_OK on success, &IMSM_STATUS_ERROR otherwise.
7663  *
7664  * Disks are marked for creation via dl->raiddisk.
7665  */
7666 static imsm_status_t autolayout_imsm(struct intel_super *super,
7667                                      const int raiddisks,
7668                                      unsigned long long size, const int chunk,
7669                                      unsigned long long *freesize)
7670 {
7671         int curr_slot = 0;
7672         struct dl *disk;
7673         int vol_cnt = super->anchor->num_raid_devs;
7674         imsm_status_t rv;
7675
7676         rv = imsm_get_free_size(super, raiddisks, size, chunk, freesize, false);
7677         if (rv != IMSM_STATUS_OK)
7678                 return IMSM_STATUS_ERROR;
7679
7680         for (disk = super->disks; disk; disk = disk->next) {
7681                 if (!disk->e)
7682                         continue;
7683
7684                 if (curr_slot == raiddisks)
7685                         break;
7686
7687                 if (vol_cnt == 0) {
7688                         disk->raiddisk = curr_slot;
7689                 } else {
7690                         int _slot = get_disk_slot_in_dev(super, 0, disk->index);
7691
7692                         if (_slot == -1) {
7693                                 pr_err("Disk %s is not used in first volume, aborting\n",
7694                                        disk->devname);
7695                                 return IMSM_STATUS_ERROR;
7696                         }
7697                         disk->raiddisk = _slot;
7698                 }
7699                 curr_slot++;
7700         }
7701
7702         return IMSM_STATUS_OK;
7703 }
7704
7705 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
7706                                   int raiddisks, int *chunk, unsigned long long size,
7707                                   unsigned long long data_offset,
7708                                   char *dev, unsigned long long *freesize,
7709                                   int consistency_policy, int verbose)
7710 {
7711         int fd, cfd;
7712         struct mdinfo *sra;
7713         int is_member = 0;
7714
7715         /* load capability
7716          * if given unused devices create a container
7717          * if given given devices in a container create a member volume
7718          */
7719         if (is_container(level))
7720                 /* Must be a fresh device to add to a container */
7721                 return validate_geometry_imsm_container(st, level, raiddisks,
7722                                                         data_offset, dev,
7723                                                         freesize, verbose);
7724
7725         /*
7726          * Size is given in sectors.
7727          */
7728         if (size && (size < 2048)) {
7729                 pr_err("Given size must be greater than 1M.\n");
7730                 /* Depends on algorithm in Create.c :
7731                  * if container was given (dev == NULL) return -1,
7732                  * if block device was given ( dev != NULL) return 0.
7733                  */
7734                 return dev ? -1 : 0;
7735         }
7736
7737         if (!dev) {
7738                 struct intel_super *super = st->sb;
7739
7740                 /*
7741                  * Autolayout mode, st->sb must be set.
7742                  */
7743                 if (!super) {
7744                         pr_vrb("superblock must be set for autolayout, aborting\n");
7745                         return 0;
7746                 }
7747
7748                 if (!validate_geometry_imsm_orom(st->sb, level, layout,
7749                                                  raiddisks, chunk, size,
7750                                                  verbose))
7751                         return 0;
7752
7753                 if (super->orom && freesize) {
7754                         imsm_status_t rv;
7755                         int count = count_volumes(super->hba, super->orom->dpa,
7756                                               verbose);
7757                         if (super->orom->vphba <= count) {
7758                                 pr_vrb("platform does not support more than %d raid volumes.\n",
7759                                        super->orom->vphba);
7760                                 return 0;
7761                         }
7762
7763                         rv = autolayout_imsm(super, raiddisks, size, *chunk,
7764                                              freesize);
7765                         if (rv != IMSM_STATUS_OK)
7766                                 return 0;
7767                 }
7768                 return 1;
7769         }
7770         if (st->sb) {
7771                 /* creating in a given container */
7772                 return validate_geometry_imsm_volume(st, level, layout,
7773                                                      raiddisks, chunk, size,
7774                                                      data_offset,
7775                                                      dev, freesize, verbose);
7776         }
7777
7778         /* This device needs to be a device in an 'imsm' container */
7779         fd = open(dev, O_RDONLY|O_EXCL, 0);
7780
7781         if (is_fd_valid(fd)) {
7782                 pr_vrb("Cannot create this array on device %s\n", dev);
7783                 close(fd);
7784                 return 0;
7785         }
7786         if (errno == EBUSY)
7787                 fd = open(dev, O_RDONLY, 0);
7788
7789         if (!is_fd_valid(fd)) {
7790                 pr_vrb("Cannot open %s: %s\n", dev, strerror(errno));
7791                 return 0;
7792         }
7793
7794         /* Well, it is in use by someone, maybe an 'imsm' container. */
7795         cfd = open_container(fd);
7796         close_fd(&fd);
7797
7798         if (!is_fd_valid(cfd)) {
7799                 pr_vrb("Cannot use %s: It is busy\n", dev);
7800                 return 0;
7801         }
7802         sra = sysfs_read(cfd, NULL, GET_VERSION);
7803         if (sra && sra->array.major_version == -1 &&
7804             strcmp(sra->text_version, "imsm") == 0)
7805                 is_member = 1;
7806         sysfs_free(sra);
7807         if (is_member) {
7808                 /* This is a member of a imsm container.  Load the container
7809                  * and try to create a volume
7810                  */
7811                 struct intel_super *super;
7812
7813                 if (load_super_imsm_all(st, cfd, (void **) &super, NULL, NULL, 1) == 0) {
7814                         st->sb = super;
7815                         strcpy(st->container_devnm, fd2devnm(cfd));
7816                         close(cfd);
7817                         return validate_geometry_imsm_volume(st, level, layout,
7818                                                              raiddisks, chunk,
7819                                                              size, data_offset, dev,
7820                                                              freesize, 1)
7821                                 ? 1 : -1;
7822                 }
7823         }
7824
7825         if (verbose)
7826                 pr_err("failed container membership check\n");
7827
7828         close(cfd);
7829         return 0;
7830 }
7831
7832 static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
7833 {
7834         struct intel_super *super = st->sb;
7835
7836         if (level && *level == UnSet)
7837                 *level = LEVEL_CONTAINER;
7838
7839         if (level && layout && *layout == UnSet)
7840                 *layout = imsm_level_to_layout(*level);
7841
7842         if (chunk && (*chunk == UnSet || *chunk == 0))
7843                 *chunk = imsm_default_chunk(super->orom);
7844 }
7845
7846 static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
7847
7848 static int kill_subarray_imsm(struct supertype *st, char *subarray_id)
7849 {
7850         /* remove the subarray currently referenced by subarray_id */
7851         __u8 i;
7852         struct intel_dev **dp;
7853         struct intel_super *super = st->sb;
7854         __u8 current_vol = strtoul(subarray_id, NULL, 10);
7855         struct imsm_super *mpb = super->anchor;
7856
7857         if (mpb->num_raid_devs == 0)
7858                 return 2;
7859
7860         /* block deletions that would change the uuid of active subarrays
7861          *
7862          * FIXME when immutable ids are available, but note that we'll
7863          * also need to fixup the invalidated/active subarray indexes in
7864          * mdstat
7865          */
7866         for (i = 0; i < mpb->num_raid_devs; i++) {
7867                 char subarray[4];
7868
7869                 if (i < current_vol)
7870                         continue;
7871                 snprintf(subarray, sizeof(subarray), "%u", i);
7872                 if (is_subarray_active(subarray, st->devnm)) {
7873                         pr_err("deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
7874                                current_vol, i);
7875
7876                         return 2;
7877                 }
7878         }
7879
7880         if (st->update_tail) {
7881                 struct imsm_update_kill_array *u = xmalloc(sizeof(*u));
7882
7883                 u->type = update_kill_array;
7884                 u->dev_idx = current_vol;
7885                 append_metadata_update(st, u, sizeof(*u));
7886
7887                 return 0;
7888         }
7889
7890         for (dp = &super->devlist; *dp;)
7891                 if ((*dp)->index == current_vol) {
7892                         *dp = (*dp)->next;
7893                 } else {
7894                         handle_missing(super, (*dp)->dev);
7895                         if ((*dp)->index > current_vol)
7896                                 (*dp)->index--;
7897                         dp = &(*dp)->next;
7898                 }
7899
7900         /* no more raid devices, all active components are now spares,
7901          * but of course failed are still failed
7902          */
7903         if (--mpb->num_raid_devs == 0) {
7904                 struct dl *d;
7905
7906                 for (d = super->disks; d; d = d->next)
7907                         if (d->index > -2)
7908                                 mark_spare(d);
7909         }
7910
7911         super->updates_pending++;
7912
7913         return 0;
7914 }
7915
7916 /**
7917  * get_rwh_policy_from_update() - Get the rwh policy for update option.
7918  * @update: Update option.
7919  */
7920 static int get_rwh_policy_from_update(enum update_opt update)
7921 {
7922         switch (update) {
7923         case UOPT_PPL:
7924                 return RWH_MULTIPLE_DISTRIBUTED;
7925         case UOPT_NO_PPL:
7926                 return RWH_MULTIPLE_OFF;
7927         case UOPT_BITMAP:
7928                 return RWH_BITMAP;
7929         case UOPT_NO_BITMAP:
7930                 return RWH_OFF;
7931         default:
7932                 break;
7933         }
7934         return UOPT_UNDEFINED;
7935 }
7936
7937 static int update_subarray_imsm(struct supertype *st, char *subarray,
7938                                 enum update_opt update, struct mddev_ident *ident)
7939 {
7940         /* update the subarray currently referenced by ->current_vol */
7941         struct intel_super *super = st->sb;
7942         struct imsm_super *mpb = super->anchor;
7943
7944         if (update == UOPT_NAME) {
7945                 char *name = ident->name;
7946                 char *ep;
7947                 int vol;
7948
7949                 if (imsm_is_name_allowed(super, name, 1) == false)
7950                         return 2;
7951
7952                 vol = strtoul(subarray, &ep, 10);
7953                 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
7954                         return 2;
7955
7956                 if (st->update_tail) {
7957                         struct imsm_update_rename_array *u = xmalloc(sizeof(*u));
7958
7959                         u->type = update_rename_array;
7960                         u->dev_idx = vol;
7961                         strncpy((char *) u->name, name, MAX_RAID_SERIAL_LEN);
7962                         u->name[MAX_RAID_SERIAL_LEN-1] = '\0';
7963                         append_metadata_update(st, u, sizeof(*u));
7964                 } else {
7965                         struct imsm_dev *dev;
7966                         int i, namelen;
7967
7968                         dev = get_imsm_dev(super, vol);
7969                         memset(dev->volume, '\0', MAX_RAID_SERIAL_LEN);
7970                         namelen = min((int)strlen(name), MAX_RAID_SERIAL_LEN);
7971                         memcpy(dev->volume, name, namelen);
7972                         for (i = 0; i < mpb->num_raid_devs; i++) {
7973                                 dev = get_imsm_dev(super, i);
7974                                 handle_missing(super, dev);
7975                         }
7976                         super->updates_pending++;
7977                 }
7978         } else if (get_rwh_policy_from_update(update) != UOPT_UNDEFINED) {
7979                 int new_policy;
7980                 char *ep;
7981                 int vol = strtoul(subarray, &ep, 10);
7982
7983                 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
7984                         return 2;
7985
7986                 new_policy = get_rwh_policy_from_update(update);
7987
7988                 if (st->update_tail) {
7989                         struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
7990
7991                         u->type = update_rwh_policy;
7992                         u->dev_idx = vol;
7993                         u->new_policy = new_policy;
7994                         append_metadata_update(st, u, sizeof(*u));
7995                 } else {
7996                         struct imsm_dev *dev;
7997
7998                         dev = get_imsm_dev(super, vol);
7999                         dev->rwh_policy = new_policy;
8000                         super->updates_pending++;
8001                 }
8002                 if (new_policy == RWH_BITMAP)
8003                         return write_init_bitmap_imsm_vol(st, vol);
8004         } else
8005                 return 2;
8006
8007         return 0;
8008 }
8009
8010 static bool is_gen_migration(struct imsm_dev *dev)
8011 {
8012         if (dev && dev->vol.migr_state &&
8013             migr_type(dev) == MIGR_GEN_MIGR)
8014                 return true;
8015
8016         return false;
8017 }
8018
8019 static int is_rebuilding(struct imsm_dev *dev)
8020 {
8021         struct imsm_map *migr_map;
8022
8023         if (!dev->vol.migr_state)
8024                 return 0;
8025
8026         if (migr_type(dev) != MIGR_REBUILD)
8027                 return 0;
8028
8029         migr_map = get_imsm_map(dev, MAP_1);
8030
8031         if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
8032                 return 1;
8033         else
8034                 return 0;
8035 }
8036
8037 static int is_initializing(struct imsm_dev *dev)
8038 {
8039         struct imsm_map *migr_map;
8040
8041         if (!dev->vol.migr_state)
8042                 return 0;
8043
8044         if (migr_type(dev) != MIGR_INIT)
8045                 return 0;
8046
8047         migr_map = get_imsm_map(dev, MAP_1);
8048
8049         if (migr_map->map_state == IMSM_T_STATE_UNINITIALIZED)
8050                 return 1;
8051
8052         return 0;
8053 }
8054
8055 static void update_recovery_start(struct intel_super *super,
8056                                         struct imsm_dev *dev,
8057                                         struct mdinfo *array)
8058 {
8059         struct mdinfo *rebuild = NULL;
8060         struct mdinfo *d;
8061         __u32 units;
8062
8063         if (!is_rebuilding(dev))
8064                 return;
8065
8066         /* Find the rebuild target, but punt on the dual rebuild case */
8067         for (d = array->devs; d; d = d->next)
8068                 if (d->recovery_start == 0) {
8069                         if (rebuild)
8070                                 return;
8071                         rebuild = d;
8072                 }
8073
8074         if (!rebuild) {
8075                 /* (?) none of the disks are marked with
8076                  * IMSM_ORD_REBUILD, so assume they are missing and the
8077                  * disk_ord_tbl was not correctly updated
8078                  */
8079                 dprintf("failed to locate out-of-sync disk\n");
8080                 return;
8081         }
8082
8083         units = vol_curr_migr_unit(dev);
8084         rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
8085 }
8086
8087 static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
8088
8089 static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
8090 {
8091         /* Given a container loaded by load_super_imsm_all,
8092          * extract information about all the arrays into
8093          * an mdinfo tree.
8094          * If 'subarray' is given, just extract info about that array.
8095          *
8096          * For each imsm_dev create an mdinfo, fill it in,
8097          *  then look for matching devices in super->disks
8098          *  and create appropriate device mdinfo.
8099          */
8100         struct intel_super *super = st->sb;
8101         struct imsm_super *mpb = super->anchor;
8102         struct mdinfo *rest = NULL;
8103         unsigned int i;
8104         int sb_errors = 0;
8105         struct dl *d;
8106         int spare_disks = 0;
8107         int current_vol = super->current_vol;
8108
8109         /* do not assemble arrays when not all attributes are supported */
8110         if (imsm_check_attributes(mpb->attributes) == 0) {
8111                 sb_errors = 1;
8112                 pr_err("Unsupported attributes in IMSM metadata.Arrays activation is blocked.\n");
8113         }
8114
8115         /* count spare devices, not used in maps
8116          */
8117         for (d = super->disks; d; d = d->next)
8118                 if (d->index == -1)
8119                         spare_disks++;
8120
8121         for (i = 0; i < mpb->num_raid_devs; i++) {
8122                 struct imsm_dev *dev;
8123                 struct imsm_map *map;
8124                 struct imsm_map *map2;
8125                 struct mdinfo *this;
8126                 int slot;
8127                 int chunk;
8128                 char *ep;
8129                 int level;
8130
8131                 if (subarray &&
8132                     (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
8133                         continue;
8134
8135                 dev = get_imsm_dev(super, i);
8136                 map = get_imsm_map(dev, MAP_0);
8137                 map2 = get_imsm_map(dev, MAP_1);
8138                 level = get_imsm_raid_level(map);
8139
8140                 /* do not publish arrays that are in the middle of an
8141                  * unsupported migration
8142                  */
8143                 if (dev->vol.migr_state &&
8144                     (migr_type(dev) == MIGR_STATE_CHANGE)) {
8145                         pr_err("cannot assemble volume '%.16s': unsupported migration in progress\n",
8146                                 dev->volume);
8147                         continue;
8148                 }
8149                 /* do not publish arrays that are not support by controller's
8150                  * OROM/EFI
8151                  */
8152
8153                 this = xmalloc(sizeof(*this));
8154
8155                 super->current_vol = i;
8156                 getinfo_super_imsm_volume(st, this, NULL);
8157                 this->next = rest;
8158                 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
8159                 /* mdadm does not support all metadata features- set the bit in all arrays state */
8160                 if (!validate_geometry_imsm_orom(super,
8161                                                  level, /* RAID level */
8162                                                  imsm_level_to_layout(level),
8163                                                  map->num_members, /* raid disks */
8164                                                  &chunk, imsm_dev_size(dev),
8165                                                  1 /* verbose */)) {
8166                         pr_err("IMSM RAID geometry validation failed.  Array %s activation is blocked.\n",
8167                                 dev->volume);
8168                         this->array.state |=
8169                           (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
8170                           (1<<MD_SB_BLOCK_VOLUME);
8171                 }
8172
8173                 /* if array has bad blocks, set suitable bit in all arrays state */
8174                 if (sb_errors)
8175                         this->array.state |=
8176                           (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
8177                           (1<<MD_SB_BLOCK_VOLUME);
8178
8179                 for (slot = 0 ; slot <  map->num_members; slot++) {
8180                         unsigned long long recovery_start;
8181                         struct mdinfo *info_d;
8182                         struct dl *d;
8183                         int idx;
8184                         int skip;
8185                         __u32 ord;
8186                         int missing = 0;
8187
8188                         skip = 0;
8189                         idx = get_imsm_disk_idx(dev, slot, MAP_0);
8190                         ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
8191                         for (d = super->disks; d ; d = d->next)
8192                                 if (d->index == idx)
8193                                         break;
8194
8195                         recovery_start = MaxSector;
8196                         if (d == NULL)
8197                                 skip = 1;
8198                         if (d && is_failed(&d->disk))
8199                                 skip = 1;
8200                         if (!skip && (ord & IMSM_ORD_REBUILD))
8201                                 recovery_start = 0;
8202                         if (!(ord & IMSM_ORD_REBUILD))
8203                                 this->array.working_disks++;
8204                         /*
8205                          * if we skip some disks the array will be assmebled degraded;
8206                          * reset resync start to avoid a dirty-degraded
8207                          * situation when performing the intial sync
8208                          */
8209                         if (skip)
8210                                 missing++;
8211
8212                         if (!(dev->vol.dirty & RAIDVOL_DIRTY)) {
8213                                 if ((!able_to_resync(level, missing) ||
8214                                      recovery_start == 0))
8215                                         this->resync_start = MaxSector;
8216                         }
8217
8218                         if (skip)
8219                                 continue;
8220
8221                         info_d = xcalloc(1, sizeof(*info_d));
8222                         info_d->next = this->devs;
8223                         this->devs = info_d;
8224
8225                         info_d->disk.number = d->index;
8226                         info_d->disk.major = d->major;
8227                         info_d->disk.minor = d->minor;
8228                         info_d->disk.raid_disk = slot;
8229                         info_d->recovery_start = recovery_start;
8230                         if (map2) {
8231                                 if (slot < map2->num_members)
8232                                         info_d->disk.state = (1 << MD_DISK_ACTIVE);
8233                                 else
8234                                         this->array.spare_disks++;
8235                         } else {
8236                                 if (slot < map->num_members)
8237                                         info_d->disk.state = (1 << MD_DISK_ACTIVE);
8238                                 else
8239                                         this->array.spare_disks++;
8240                         }
8241
8242                         info_d->events = __le32_to_cpu(mpb->generation_num);
8243                         info_d->data_offset = pba_of_lba0(map);
8244                         info_d->component_size = calc_component_size(map, dev);
8245
8246                         if (map->raid_level == 5) {
8247                                 info_d->ppl_sector = this->ppl_sector;
8248                                 info_d->ppl_size = this->ppl_size;
8249                                 if (this->consistency_policy == CONSISTENCY_POLICY_PPL &&
8250                                     recovery_start == 0)
8251                                         this->resync_start = 0;
8252                         }
8253
8254                         info_d->bb.supported = 1;
8255                         get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
8256                                              info_d->data_offset,
8257                                              info_d->component_size,
8258                                              &info_d->bb);
8259                 }
8260                 /* now that the disk list is up-to-date fixup recovery_start */
8261                 update_recovery_start(super, dev, this);
8262                 this->array.spare_disks += spare_disks;
8263
8264                 /* check for reshape */
8265                 if (this->reshape_active == 1)
8266                         recover_backup_imsm(st, this);
8267                 rest = this;
8268         }
8269
8270         super->current_vol = current_vol;
8271         return rest;
8272 }
8273
8274 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
8275                                 int failed, int look_in_map)
8276 {
8277         struct imsm_map *map;
8278
8279         map = get_imsm_map(dev, look_in_map);
8280
8281         if (!failed)
8282                 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
8283                         IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
8284
8285         switch (get_imsm_raid_level(map)) {
8286         case 0:
8287                 return IMSM_T_STATE_FAILED;
8288                 break;
8289         case 1:
8290                 if (failed < map->num_members)
8291                         return IMSM_T_STATE_DEGRADED;
8292                 else
8293                         return IMSM_T_STATE_FAILED;
8294                 break;
8295         case 10:
8296         {
8297                 /**
8298                  * check to see if any mirrors have failed, otherwise we
8299                  * are degraded.  Even numbered slots are mirrored on
8300                  * slot+1
8301                  */
8302                 int i;
8303                 /* gcc -Os complains that this is unused */
8304                 int insync = insync;
8305
8306                 for (i = 0; i < map->num_members; i++) {
8307                         __u32 ord = get_imsm_ord_tbl_ent(dev, i, MAP_X);
8308                         int idx = ord_to_idx(ord);
8309                         struct imsm_disk *disk;
8310
8311                         /* reset the potential in-sync count on even-numbered
8312                          * slots.  num_copies is always 2 for imsm raid10
8313                          */
8314                         if ((i & 1) == 0)
8315                                 insync = 2;
8316
8317                         disk = get_imsm_disk(super, idx);
8318                         if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
8319                                 insync--;
8320
8321                         /* no in-sync disks left in this mirror the
8322                          * array has failed
8323                          */
8324                         if (insync == 0)
8325                                 return IMSM_T_STATE_FAILED;
8326                 }
8327
8328                 return IMSM_T_STATE_DEGRADED;
8329         }
8330         case 5:
8331                 if (failed < 2)
8332                         return IMSM_T_STATE_DEGRADED;
8333                 else
8334                         return IMSM_T_STATE_FAILED;
8335                 break;
8336         default:
8337                 break;
8338         }
8339
8340         return map->map_state;
8341 }
8342
8343 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
8344                              int look_in_map)
8345 {
8346         int i;
8347         int failed = 0;
8348         struct imsm_disk *disk;
8349         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8350         struct imsm_map *prev = get_imsm_map(dev, MAP_1);
8351         struct imsm_map *map_for_loop;
8352         __u32 ord;
8353         int idx;
8354         int idx_1;
8355
8356         /* at the beginning of migration we set IMSM_ORD_REBUILD on
8357          * disks that are being rebuilt.  New failures are recorded to
8358          * map[0].  So we look through all the disks we started with and
8359          * see if any failures are still present, or if any new ones
8360          * have arrived
8361          */
8362         map_for_loop = map;
8363         if (prev && (map->num_members < prev->num_members))
8364                 map_for_loop = prev;
8365
8366         for (i = 0; i < map_for_loop->num_members; i++) {
8367                 idx_1 = -255;
8368                 /* when MAP_X is passed both maps failures are counted
8369                  */
8370                 if (prev &&
8371                     (look_in_map == MAP_1 || look_in_map == MAP_X) &&
8372                     i < prev->num_members) {
8373                         ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
8374                         idx_1 = ord_to_idx(ord);
8375
8376                         disk = get_imsm_disk(super, idx_1);
8377                         if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
8378                                 failed++;
8379                 }
8380                 if ((look_in_map == MAP_0 || look_in_map == MAP_X) &&
8381                     i < map->num_members) {
8382                         ord = __le32_to_cpu(map->disk_ord_tbl[i]);
8383                         idx = ord_to_idx(ord);
8384
8385                         if (idx != idx_1) {
8386                                 disk = get_imsm_disk(super, idx);
8387                                 if (!disk || is_failed(disk) ||
8388                                     ord & IMSM_ORD_REBUILD)
8389                                         failed++;
8390                         }
8391                 }
8392         }
8393
8394         return failed;
8395 }
8396
8397 static int imsm_open_new(struct supertype *c, struct active_array *a,
8398                          int inst)
8399 {
8400         struct intel_super *super = c->sb;
8401         struct imsm_super *mpb = super->anchor;
8402         struct imsm_update_prealloc_bb_mem u;
8403
8404         if (inst >= mpb->num_raid_devs) {
8405                 pr_err("subarry index %d, out of range\n", inst);
8406                 return -ENODEV;
8407         }
8408
8409         dprintf("imsm: open_new %d\n", inst);
8410         a->info.container_member = inst;
8411
8412         u.type = update_prealloc_badblocks_mem;
8413         imsm_update_metadata_locally(c, &u, sizeof(u));
8414
8415         return 0;
8416 }
8417
8418 static int is_resyncing(struct imsm_dev *dev)
8419 {
8420         struct imsm_map *migr_map;
8421
8422         if (!dev->vol.migr_state)
8423                 return 0;
8424
8425         if (migr_type(dev) == MIGR_INIT ||
8426             migr_type(dev) == MIGR_REPAIR)
8427                 return 1;
8428
8429         if (migr_type(dev) == MIGR_GEN_MIGR)
8430                 return 0;
8431
8432         migr_map = get_imsm_map(dev, MAP_1);
8433
8434         if (migr_map->map_state == IMSM_T_STATE_NORMAL &&
8435             dev->vol.migr_type != MIGR_GEN_MIGR)
8436                 return 1;
8437         else
8438                 return 0;
8439 }
8440
8441 /* return true if we recorded new information */
8442 static int mark_failure(struct intel_super *super,
8443                         struct imsm_dev *dev, struct imsm_disk *disk, int idx)
8444 {
8445         __u32 ord;
8446         int slot;
8447         struct imsm_map *map;
8448         char buf[MAX_RAID_SERIAL_LEN+3];
8449         unsigned int len, shift = 0;
8450
8451         /* new failures are always set in map[0] */
8452         map = get_imsm_map(dev, MAP_0);
8453
8454         slot = get_imsm_disk_slot(map, idx);
8455         if (slot < 0)
8456                 return 0;
8457
8458         ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
8459         if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
8460                 return 0;
8461
8462         memcpy(buf, disk->serial, MAX_RAID_SERIAL_LEN);
8463         buf[MAX_RAID_SERIAL_LEN] = '\000';
8464         strcat(buf, ":0");
8465         if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN)
8466                 shift = len - MAX_RAID_SERIAL_LEN + 1;
8467         memcpy(disk->serial, &buf[shift], len + 1 - shift);
8468
8469         disk->status |= FAILED_DISK;
8470         set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
8471         /* mark failures in second map if second map exists and this disk
8472          * in this slot.
8473          * This is valid for migration, initialization and rebuild
8474          */
8475         if (dev->vol.migr_state) {
8476                 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
8477                 int slot2 = get_imsm_disk_slot(map2, idx);
8478
8479                 if (slot2 < map2->num_members && slot2 >= 0)
8480                         set_imsm_ord_tbl_ent(map2, slot2,
8481                                              idx | IMSM_ORD_REBUILD);
8482         }
8483         if (map->failed_disk_num == 0xff ||
8484                 (!is_rebuilding(dev) && map->failed_disk_num > slot))
8485                 map->failed_disk_num = slot;
8486
8487         clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
8488
8489         return 1;
8490 }
8491
8492 static void mark_missing(struct intel_super *super,
8493                          struct imsm_dev *dev, struct imsm_disk *disk, int idx)
8494 {
8495         mark_failure(super, dev, disk, idx);
8496
8497         if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
8498                 return;
8499
8500         disk->scsi_id = __cpu_to_le32(~(__u32)0);
8501         memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
8502 }
8503
8504 static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
8505 {
8506         struct dl *dl;
8507
8508         if (!super->missing)
8509                 return;
8510
8511         /* When orom adds replacement for missing disk it does
8512          * not remove entry of missing disk, but just updates map with
8513          * new added disk. So it is not enough just to test if there is
8514          * any missing disk, we have to look if there are any failed disks
8515          * in map to stop migration */
8516
8517         dprintf("imsm: mark missing\n");
8518         /* end process for initialization and rebuild only
8519          */
8520         if (is_gen_migration(dev) == false) {
8521                 int failed = imsm_count_failed(super, dev, MAP_0);
8522
8523                 if (failed) {
8524                         __u8 map_state;
8525                         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8526                         struct imsm_map *map1;
8527                         int i, ord, ord_map1;
8528                         int rebuilt = 1;
8529
8530                         for (i = 0; i < map->num_members; i++) {
8531                                 ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
8532                                 if (!(ord & IMSM_ORD_REBUILD))
8533                                         continue;
8534
8535                                 map1 = get_imsm_map(dev, MAP_1);
8536                                 if (!map1)
8537                                         continue;
8538
8539                                 ord_map1 = __le32_to_cpu(map1->disk_ord_tbl[i]);
8540                                 if (ord_map1 & IMSM_ORD_REBUILD)
8541                                         rebuilt = 0;
8542                         }
8543
8544                         if (rebuilt) {
8545                                 map_state = imsm_check_degraded(super, dev,
8546                                                                 failed, MAP_0);
8547                                 end_migration(dev, super, map_state);
8548                         }
8549                 }
8550         }
8551         for (dl = super->missing; dl; dl = dl->next)
8552                 mark_missing(super, dev, &dl->disk, dl->index);
8553         super->updates_pending++;
8554 }
8555
8556 static unsigned long long imsm_set_array_size(struct imsm_dev *dev,
8557                                               long long new_size)
8558 {
8559         unsigned long long array_blocks;
8560         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8561         int used_disks = imsm_num_data_members(map);
8562
8563         if (used_disks == 0) {
8564                 /* when problems occures
8565                  * return current array_blocks value
8566                  */
8567                 array_blocks = imsm_dev_size(dev);
8568
8569                 return array_blocks;
8570         }
8571
8572         /* set array size in metadata
8573          */
8574         if (new_size <= 0)
8575                 /* OLCE size change is caused by added disks
8576                  */
8577                 array_blocks = per_dev_array_size(map) * used_disks;
8578         else
8579                 /* Online Volume Size Change
8580                  * Using  available free space
8581                  */
8582                 array_blocks = new_size;
8583
8584         array_blocks = round_size_to_mb(array_blocks, used_disks);
8585         set_imsm_dev_size(dev, array_blocks);
8586
8587         return array_blocks;
8588 }
8589
8590 static void imsm_set_disk(struct active_array *a, int n, int state);
8591
8592 static void imsm_progress_container_reshape(struct intel_super *super)
8593 {
8594         /* if no device has a migr_state, but some device has a
8595          * different number of members than the previous device, start
8596          * changing the number of devices in this device to match
8597          * previous.
8598          */
8599         struct imsm_super *mpb = super->anchor;
8600         int prev_disks = -1;
8601         int i;
8602         int copy_map_size;
8603
8604         for (i = 0; i < mpb->num_raid_devs; i++) {
8605                 struct imsm_dev *dev = get_imsm_dev(super, i);
8606                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
8607                 struct imsm_map *map2;
8608                 int prev_num_members;
8609
8610                 if (dev->vol.migr_state)
8611                         return;
8612
8613                 if (prev_disks == -1)
8614                         prev_disks = map->num_members;
8615                 if (prev_disks == map->num_members)
8616                         continue;
8617
8618                 /* OK, this array needs to enter reshape mode.
8619                  * i.e it needs a migr_state
8620                  */
8621
8622                 copy_map_size = sizeof_imsm_map(map);
8623                 prev_num_members = map->num_members;
8624                 map->num_members = prev_disks;
8625                 dev->vol.migr_state = 1;
8626                 set_vol_curr_migr_unit(dev, 0);
8627                 set_migr_type(dev, MIGR_GEN_MIGR);
8628                 for (i = prev_num_members;
8629                      i < map->num_members; i++)
8630                         set_imsm_ord_tbl_ent(map, i, i);
8631                 map2 = get_imsm_map(dev, MAP_1);
8632                 /* Copy the current map */
8633                 memcpy(map2, map, copy_map_size);
8634                 map2->num_members = prev_num_members;
8635
8636                 imsm_set_array_size(dev, -1);
8637                 super->clean_migration_record_by_mdmon = 1;
8638                 super->updates_pending++;
8639         }
8640 }
8641
8642 /* Handle dirty -> clean transititions, resync and reshape.  Degraded and rebuild
8643  * states are handled in imsm_set_disk() with one exception, when a
8644  * resync is stopped due to a new failure this routine will set the
8645  * 'degraded' state for the array.
8646  */
8647 static int imsm_set_array_state(struct active_array *a, int consistent)
8648 {
8649         int inst = a->info.container_member;
8650         struct intel_super *super = a->container->sb;
8651         struct imsm_dev *dev = get_imsm_dev(super, inst);
8652         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8653         int failed = imsm_count_failed(super, dev, MAP_0);
8654         __u8 map_state = imsm_check_degraded(super, dev, failed, MAP_0);
8655         __u32 blocks_per_unit;
8656
8657         if (dev->vol.migr_state &&
8658             dev->vol.migr_type  == MIGR_GEN_MIGR) {
8659                 /* array state change is blocked due to reshape action
8660                  * We might need to
8661                  * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
8662                  * - finish the reshape (if last_checkpoint is big and action != reshape)
8663                  * - update vol_curr_migr_unit
8664                  */
8665                 if (a->curr_action == reshape) {
8666                         /* still reshaping, maybe update vol_curr_migr_unit */
8667                         goto mark_checkpoint;
8668                 } else {
8669                         if (a->last_checkpoint >= a->info.component_size) {
8670                                 unsigned long long array_blocks;
8671                                 int used_disks;
8672                                 struct mdinfo *mdi;
8673
8674                                 used_disks = imsm_num_data_members(map);
8675                                 if (used_disks > 0) {
8676                                         array_blocks =
8677                                                 per_dev_array_size(map) *
8678                                                 used_disks;
8679                                         array_blocks =
8680                                                 round_size_to_mb(array_blocks,
8681                                                                  used_disks);
8682                                         a->info.custom_array_size = array_blocks;
8683                                         /* encourage manager to update array
8684                                          * size
8685                                          */
8686
8687                                         a->check_reshape = 1;
8688                                 }
8689                                 /* finalize online capacity expansion/reshape */
8690                                 for (mdi = a->info.devs; mdi; mdi = mdi->next)
8691                                         imsm_set_disk(a,
8692                                                       mdi->disk.raid_disk,
8693                                                       mdi->curr_state);
8694
8695                                 imsm_progress_container_reshape(super);
8696                         }
8697                 }
8698         }
8699
8700         /* before we activate this array handle any missing disks */
8701         if (consistent == 2)
8702                 handle_missing(super, dev);
8703
8704         if (consistent == 2 &&
8705             (!is_resync_complete(&a->info) ||
8706              map_state != IMSM_T_STATE_NORMAL ||
8707              dev->vol.migr_state))
8708                 consistent = 0;
8709
8710         if (is_resync_complete(&a->info)) {
8711                 /* complete intialization / resync,
8712                  * recovery and interrupted recovery is completed in
8713                  * ->set_disk
8714                  */
8715                 if (is_resyncing(dev)) {
8716                         dprintf("imsm: mark resync done\n");
8717                         end_migration(dev, super, map_state);
8718                         super->updates_pending++;
8719                         a->last_checkpoint = 0;
8720                 }
8721         } else if ((!is_resyncing(dev) && !failed) &&
8722                    (imsm_reshape_blocks_arrays_changes(super) == 0)) {
8723                 /* mark the start of the init process if nothing is failed */
8724                 dprintf("imsm: mark resync start\n");
8725                 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
8726                         migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
8727                 else
8728                         migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
8729                 super->updates_pending++;
8730         }
8731
8732         if (a->prev_action == idle)
8733                 goto skip_mark_checkpoint;
8734
8735 mark_checkpoint:
8736         /* skip checkpointing for general migration,
8737          * it is controlled in mdadm
8738          */
8739         if (is_gen_migration(dev))
8740                 goto skip_mark_checkpoint;
8741
8742         /* check if we can update vol_curr_migr_unit from resync_start,
8743          * recovery_start
8744          */
8745         blocks_per_unit = blocks_per_migr_unit(super, dev);
8746         if (blocks_per_unit) {
8747                 set_vol_curr_migr_unit(dev,
8748                                        a->last_checkpoint / blocks_per_unit);
8749                 dprintf("imsm: mark checkpoint (%llu)\n",
8750                         vol_curr_migr_unit(dev));
8751                 super->updates_pending++;
8752         }
8753
8754 skip_mark_checkpoint:
8755         /* mark dirty / clean */
8756         if (((dev->vol.dirty & RAIDVOL_DIRTY) && consistent) ||
8757             (!(dev->vol.dirty & RAIDVOL_DIRTY) && !consistent)) {
8758                 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
8759                 if (consistent) {
8760                         dev->vol.dirty = RAIDVOL_CLEAN;
8761                 } else {
8762                         dev->vol.dirty = RAIDVOL_DIRTY;
8763                         if (dev->rwh_policy == RWH_DISTRIBUTED ||
8764                             dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
8765                                 dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
8766                 }
8767                 super->updates_pending++;
8768         }
8769
8770         return consistent;
8771 }
8772
8773 static int imsm_disk_slot_to_ord(struct active_array *a, int slot)
8774 {
8775         int inst = a->info.container_member;
8776         struct intel_super *super = a->container->sb;
8777         struct imsm_dev *dev = get_imsm_dev(super, inst);
8778         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8779
8780         if (slot > map->num_members) {
8781                 pr_err("imsm: imsm_disk_slot_to_ord %d out of range 0..%d\n",
8782                        slot, map->num_members - 1);
8783                 return -1;
8784         }
8785
8786         if (slot < 0)
8787                 return -1;
8788
8789         return get_imsm_ord_tbl_ent(dev, slot, MAP_0);
8790 }
8791
8792 static void imsm_set_disk(struct active_array *a, int n, int state)
8793 {
8794         int inst = a->info.container_member;
8795         struct intel_super *super = a->container->sb;
8796         struct imsm_dev *dev = get_imsm_dev(super, inst);
8797         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8798         struct imsm_disk *disk;
8799         struct mdinfo *mdi;
8800         int recovery_not_finished = 0;
8801         int failed;
8802         int ord;
8803         __u8 map_state;
8804         int rebuild_done = 0;
8805         int i;
8806
8807         ord = get_imsm_ord_tbl_ent(dev, n, MAP_X);
8808         if (ord < 0)
8809                 return;
8810
8811         dprintf("imsm: set_disk %d:%x\n", n, state);
8812         disk = get_imsm_disk(super, ord_to_idx(ord));
8813
8814         /* check for new failures */
8815         if (disk && (state & DS_FAULTY)) {
8816                 if (mark_failure(super, dev, disk, ord_to_idx(ord)))
8817                         super->updates_pending++;
8818         }
8819
8820         /* check if in_sync */
8821         if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
8822                 struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
8823
8824                 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
8825                 rebuild_done = 1;
8826                 super->updates_pending++;
8827         }
8828
8829         failed = imsm_count_failed(super, dev, MAP_0);
8830         map_state = imsm_check_degraded(super, dev, failed, MAP_0);
8831
8832         /* check if recovery complete, newly degraded, or failed */
8833         dprintf("imsm: Detected transition to state ");
8834         switch (map_state) {
8835         case IMSM_T_STATE_NORMAL: /* transition to normal state */
8836                 dprintf("normal: ");
8837                 if (is_rebuilding(dev)) {
8838                         dprintf_cont("while rebuilding");
8839                         /* check if recovery is really finished */
8840                         for (mdi = a->info.devs; mdi ; mdi = mdi->next)
8841                                 if (mdi->recovery_start != MaxSector) {
8842                                         recovery_not_finished = 1;
8843                                         break;
8844                                 }
8845                         if (recovery_not_finished) {
8846                                 dprintf_cont("\n");
8847                                 dprintf("Rebuild has not finished yet, state not changed");
8848                                 if (a->last_checkpoint < mdi->recovery_start) {
8849                                         a->last_checkpoint = mdi->recovery_start;
8850                                         super->updates_pending++;
8851                                 }
8852                                 break;
8853                         }
8854                         end_migration(dev, super, map_state);
8855                         map->failed_disk_num = ~0;
8856                         super->updates_pending++;
8857                         a->last_checkpoint = 0;
8858                         break;
8859                 }
8860                 if (is_gen_migration(dev)) {
8861                         dprintf_cont("while general migration");
8862                         if (a->last_checkpoint >= a->info.component_size)
8863                                 end_migration(dev, super, map_state);
8864                         else
8865                                 map->map_state = map_state;
8866                         map->failed_disk_num = ~0;
8867                         super->updates_pending++;
8868                         break;
8869                 }
8870         break;
8871         case IMSM_T_STATE_DEGRADED: /* transition to degraded state */
8872                 dprintf_cont("degraded: ");
8873                 if (map->map_state != map_state && !dev->vol.migr_state) {
8874                         dprintf_cont("mark degraded");
8875                         map->map_state = map_state;
8876                         super->updates_pending++;
8877                         a->last_checkpoint = 0;
8878                         break;
8879                 }
8880                 if (is_rebuilding(dev)) {
8881                         dprintf_cont("while rebuilding ");
8882                         if (state & DS_FAULTY)  {
8883                                 dprintf_cont("removing failed drive ");
8884                                 if (n == map->failed_disk_num) {
8885                                         dprintf_cont("end migration");
8886                                         end_migration(dev, super, map_state);
8887                                         a->last_checkpoint = 0;
8888                                 } else {
8889                                         dprintf_cont("fail detected during rebuild, changing map state");
8890                                         map->map_state = map_state;
8891                                 }
8892                                 super->updates_pending++;
8893                         }
8894
8895                         if (!rebuild_done)
8896                                 break;
8897
8898                         /* check if recovery is really finished */
8899                         for (mdi = a->info.devs; mdi ; mdi = mdi->next)
8900                                 if (mdi->recovery_start != MaxSector) {
8901                                         recovery_not_finished = 1;
8902                                         break;
8903                                 }
8904                         if (recovery_not_finished) {
8905                                 dprintf_cont("\n");
8906                                 dprintf_cont("Rebuild has not finished yet");
8907                                 if (a->last_checkpoint < mdi->recovery_start) {
8908                                         a->last_checkpoint =
8909                                                 mdi->recovery_start;
8910                                         super->updates_pending++;
8911                                 }
8912                                 break;
8913                         }
8914
8915                         dprintf_cont(" Rebuild done, still degraded");
8916                         end_migration(dev, super, map_state);
8917                         a->last_checkpoint = 0;
8918                         super->updates_pending++;
8919
8920                         for (i = 0; i < map->num_members; i++) {
8921                                 int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
8922
8923                                 if (idx & IMSM_ORD_REBUILD)
8924                                         map->failed_disk_num = i;
8925                         }
8926                         super->updates_pending++;
8927                         break;
8928                 }
8929                 if (is_gen_migration(dev)) {
8930                         dprintf_cont("while general migration");
8931                         if (a->last_checkpoint >= a->info.component_size)
8932                                 end_migration(dev, super, map_state);
8933                         else {
8934                                 map->map_state = map_state;
8935                                 manage_second_map(super, dev);
8936                         }
8937                         super->updates_pending++;
8938                         break;
8939                 }
8940                 if (is_initializing(dev)) {
8941                         dprintf_cont("while initialization.");
8942                         map->map_state = map_state;
8943                         super->updates_pending++;
8944                         break;
8945                 }
8946         break;
8947         case IMSM_T_STATE_FAILED: /* transition to failed state */
8948                 dprintf_cont("failed: ");
8949                 if (is_gen_migration(dev)) {
8950                         dprintf_cont("while general migration");
8951                         map->map_state = map_state;
8952                         super->updates_pending++;
8953                         break;
8954                 }
8955                 if (map->map_state != map_state) {
8956                         dprintf_cont("mark failed");
8957                         end_migration(dev, super, map_state);
8958                         super->updates_pending++;
8959                         a->last_checkpoint = 0;
8960                         break;
8961                 }
8962         break;
8963         default:
8964                 dprintf_cont("state %i\n", map_state);
8965         }
8966         dprintf_cont("\n");
8967 }
8968
8969 static int store_imsm_mpb(int fd, struct imsm_super *mpb)
8970 {
8971         void *buf = mpb;
8972         __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
8973         unsigned long long dsize;
8974         unsigned long long sectors;
8975         unsigned int sector_size;
8976
8977         if (!get_dev_sector_size(fd, NULL, &sector_size))
8978                 return 1;
8979         get_dev_size(fd, NULL, &dsize);
8980
8981         if (mpb_size > sector_size) {
8982                 /* -1 to account for anchor */
8983                 sectors = mpb_sectors(mpb, sector_size) - 1;
8984
8985                 /* write the extended mpb to the sectors preceeding the anchor */
8986                 if (lseek64(fd, dsize - (sector_size * (2 + sectors)),
8987                    SEEK_SET) < 0)
8988                         return 1;
8989
8990                 if ((unsigned long long)write(fd, buf + sector_size,
8991                    sector_size * sectors) != sector_size * sectors)
8992                         return 1;
8993         }
8994
8995         /* first block is stored on second to last sector of the disk */
8996         if (lseek64(fd, dsize - (sector_size * 2), SEEK_SET) < 0)
8997                 return 1;
8998
8999         if ((unsigned int)write(fd, buf, sector_size) != sector_size)
9000                 return 1;
9001
9002         return 0;
9003 }
9004
9005 static void imsm_sync_metadata(struct supertype *container)
9006 {
9007         struct intel_super *super = container->sb;
9008
9009         dprintf("sync metadata: %d\n", super->updates_pending);
9010         if (!super->updates_pending)
9011                 return;
9012
9013         write_super_imsm(container, 0);
9014
9015         super->updates_pending = 0;
9016 }
9017
9018 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
9019 {
9020         struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
9021         int i = get_imsm_disk_idx(dev, idx, MAP_X);
9022         struct dl *dl;
9023
9024         for (dl = super->disks; dl; dl = dl->next)
9025                 if (dl->index == i)
9026                         break;
9027
9028         if (dl && is_failed(&dl->disk))
9029                 dl = NULL;
9030
9031         if (dl)
9032                 dprintf("found %x:%x\n", dl->major, dl->minor);
9033
9034         return dl;
9035 }
9036
9037 static struct dl *imsm_add_spare(struct intel_super *super, int slot,
9038                                  struct active_array *a, int activate_new,
9039                                  struct mdinfo *additional_test_list)
9040 {
9041         struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
9042         int idx = get_imsm_disk_idx(dev, slot, MAP_X);
9043         struct imsm_super *mpb = super->anchor;
9044         struct imsm_map *map;
9045         unsigned long long pos;
9046         struct mdinfo *d;
9047         struct extent *ex;
9048         int i, j;
9049         int found;
9050         __u32 array_start = 0;
9051         __u32 array_end = 0;
9052         struct dl *dl;
9053         struct mdinfo *test_list;
9054
9055         for (dl = super->disks; dl; dl = dl->next) {
9056                 /* If in this array, skip */
9057                 for (d = a->info.devs ; d ; d = d->next)
9058                         if (is_fd_valid(d->state_fd) &&
9059                             d->disk.major == dl->major &&
9060                             d->disk.minor == dl->minor) {
9061                                 dprintf("%x:%x already in array\n",
9062                                         dl->major, dl->minor);
9063                                 break;
9064                         }
9065                 if (d)
9066                         continue;
9067                 test_list = additional_test_list;
9068                 while (test_list) {
9069                         if (test_list->disk.major == dl->major &&
9070                             test_list->disk.minor == dl->minor) {
9071                                 dprintf("%x:%x already in additional test list\n",
9072                                         dl->major, dl->minor);
9073                                 break;
9074                         }
9075                         test_list = test_list->next;
9076                 }
9077                 if (test_list)
9078                         continue;
9079
9080                 /* skip in use or failed drives */
9081                 if (is_failed(&dl->disk) || idx == dl->index ||
9082                     dl->index == -2) {
9083                         dprintf("%x:%x status (failed: %d index: %d)\n",
9084                                 dl->major, dl->minor, is_failed(&dl->disk), idx);
9085                         continue;
9086                 }
9087
9088                 /* skip pure spares when we are looking for partially
9089                  * assimilated drives
9090                  */
9091                 if (dl->index == -1 && !activate_new)
9092                         continue;
9093
9094                 if (!drive_validate_sector_size(super, dl))
9095                         continue;
9096
9097                 /* Does this unused device have the requisite free space?
9098                  * It needs to be able to cover all member volumes
9099                  */
9100                 ex = get_extents(super, dl, 1);
9101                 if (!ex) {
9102                         dprintf("cannot get extents\n");
9103                         continue;
9104                 }
9105                 for (i = 0; i < mpb->num_raid_devs; i++) {
9106                         dev = get_imsm_dev(super, i);
9107                         map = get_imsm_map(dev, MAP_0);
9108
9109                         /* check if this disk is already a member of
9110                          * this array
9111                          */
9112                         if (get_imsm_disk_slot(map, dl->index) >= 0)
9113                                 continue;
9114
9115                         found = 0;
9116                         j = 0;
9117                         pos = 0;
9118                         array_start = pba_of_lba0(map);
9119                         array_end = array_start +
9120                                     per_dev_array_size(map) - 1;
9121
9122                         do {
9123                                 /* check that we can start at pba_of_lba0 with
9124                                  * num_data_stripes*blocks_per_stripe of space
9125                                  */
9126                                 if (array_start >= pos && array_end < ex[j].start) {
9127                                         found = 1;
9128                                         break;
9129                                 }
9130                                 pos = ex[j].start + ex[j].size;
9131                                 j++;
9132                         } while (ex[j-1].size);
9133
9134                         if (!found)
9135                                 break;
9136                 }
9137
9138                 free(ex);
9139                 if (i < mpb->num_raid_devs) {
9140                         dprintf("%x:%x does not have %u to %u available\n",
9141                                 dl->major, dl->minor, array_start, array_end);
9142                         /* No room */
9143                         continue;
9144                 }
9145                 return dl;
9146         }
9147
9148         return dl;
9149 }
9150
9151 static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
9152 {
9153         struct imsm_dev *dev2;
9154         struct imsm_map *map;
9155         struct dl *idisk;
9156         int slot;
9157         int idx;
9158         __u8 state;
9159
9160         dev2 = get_imsm_dev(cont->sb, dev_idx);
9161
9162         state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
9163         if (state == IMSM_T_STATE_FAILED) {
9164                 map = get_imsm_map(dev2, MAP_0);
9165                 for (slot = 0; slot < map->num_members; slot++) {
9166                         /*
9167                          * Check if failed disks are deleted from intel
9168                          * disk list or are marked to be deleted
9169                          */
9170                         idx = get_imsm_disk_idx(dev2, slot, MAP_X);
9171                         idisk = get_imsm_dl_disk(cont->sb, idx);
9172                         /*
9173                          * Do not rebuild the array if failed disks
9174                          * from failed sub-array are not removed from
9175                          * container.
9176                          */
9177                         if (idisk &&
9178                             is_failed(&idisk->disk) &&
9179                             (idisk->action != DISK_REMOVE))
9180                                 return 0;
9181                 }
9182         }
9183         return 1;
9184 }
9185
9186 static struct mdinfo *imsm_activate_spare(struct active_array *a,
9187                                           struct metadata_update **updates)
9188 {
9189         /**
9190          * Find a device with unused free space and use it to replace a
9191          * failed/vacant region in an array.  We replace failed regions one a
9192          * array at a time.  The result is that a new spare disk will be added
9193          * to the first failed array and after the monitor has finished
9194          * propagating failures the remainder will be consumed.
9195          *
9196          * FIXME add a capability for mdmon to request spares from another
9197          * container.
9198          */
9199
9200         struct intel_super *super = a->container->sb;
9201         int inst = a->info.container_member;
9202         struct imsm_dev *dev = get_imsm_dev(super, inst);
9203         struct imsm_map *map = get_imsm_map(dev, MAP_0);
9204         int failed = a->info.array.raid_disks;
9205         struct mdinfo *rv = NULL;
9206         struct mdinfo *d;
9207         struct mdinfo *di;
9208         struct metadata_update *mu;
9209         struct dl *dl;
9210         struct imsm_update_activate_spare *u;
9211         int num_spares = 0;
9212         int i;
9213         int allowed;
9214
9215         for (d = a->info.devs ; d; d = d->next) {
9216                 if (!is_fd_valid(d->state_fd))
9217                         continue;
9218
9219                 if (d->curr_state & DS_FAULTY)
9220                         /* wait for Removal to happen */
9221                         return NULL;
9222
9223                 failed--;
9224         }
9225
9226         dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
9227                 inst, failed, a->info.array.raid_disks, a->info.array.level);
9228
9229         if (imsm_reshape_blocks_arrays_changes(super))
9230                         return NULL;
9231
9232         /* Cannot activate another spare if rebuild is in progress already
9233          */
9234         if (is_rebuilding(dev)) {
9235                 dprintf("imsm: No spare activation allowed. Rebuild in progress already.\n");
9236                 return NULL;
9237         }
9238
9239         if (a->info.array.level == 4)
9240                 /* No repair for takeovered array
9241                  * imsm doesn't support raid4
9242                  */
9243                 return NULL;
9244
9245         if (imsm_check_degraded(super, dev, failed, MAP_0) !=
9246                         IMSM_T_STATE_DEGRADED)
9247                 return NULL;
9248
9249         if (get_imsm_map(dev, MAP_0)->map_state == IMSM_T_STATE_UNINITIALIZED) {
9250                 dprintf("imsm: No spare activation allowed. Volume is not initialized.\n");
9251                 return NULL;
9252         }
9253
9254         /*
9255          * If there are any failed disks check state of the other volume.
9256          * Block rebuild if the another one is failed until failed disks
9257          * are removed from container.
9258          */
9259         if (failed) {
9260                 dprintf("found failed disks in %.*s, check if there anotherfailed sub-array.\n",
9261                         MAX_RAID_SERIAL_LEN, dev->volume);
9262                 /* check if states of the other volumes allow for rebuild */
9263                 for (i = 0; i <  super->anchor->num_raid_devs; i++) {
9264                         if (i != inst) {
9265                                 allowed = imsm_rebuild_allowed(a->container,
9266                                                                i, failed);
9267                                 if (!allowed)
9268                                         return NULL;
9269                         }
9270                 }
9271         }
9272
9273         /* For each slot, if it is not working, find a spare */
9274         for (i = 0; i < a->info.array.raid_disks; i++) {
9275                 for (d = a->info.devs ; d ; d = d->next)
9276                         if (d->disk.raid_disk == i)
9277                                 break;
9278                 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
9279                 if (d && is_fd_valid(d->state_fd))
9280                         continue;
9281
9282                 /*
9283                  * OK, this device needs recovery.  Try to re-add the
9284                  * previous occupant of this slot, if this fails see if
9285                  * we can continue the assimilation of a spare that was
9286                  * partially assimilated, finally try to activate a new
9287                  * spare.
9288                  */
9289                 dl = imsm_readd(super, i, a);
9290                 if (!dl)
9291                         dl = imsm_add_spare(super, i, a, 0, rv);
9292                 if (!dl)
9293                         dl = imsm_add_spare(super, i, a, 1, rv);
9294                 if (!dl)
9295                         continue;
9296
9297                 /* found a usable disk with enough space */
9298                 di = xcalloc(1, sizeof(*di));
9299
9300                 /* dl->index will be -1 in the case we are activating a
9301                  * pristine spare.  imsm_process_update() will create a
9302                  * new index in this case.  Once a disk is found to be
9303                  * failed in all member arrays it is kicked from the
9304                  * metadata
9305                  */
9306                 di->disk.number = dl->index;
9307
9308                 /* (ab)use di->devs to store a pointer to the device
9309                  * we chose
9310                  */
9311                 di->devs = (struct mdinfo *) dl;
9312
9313                 di->disk.raid_disk = i;
9314                 di->disk.major = dl->major;
9315                 di->disk.minor = dl->minor;
9316                 di->disk.state = 0;
9317                 di->recovery_start = 0;
9318                 di->data_offset = pba_of_lba0(map);
9319                 di->component_size = a->info.component_size;
9320                 di->container_member = inst;
9321                 di->bb.supported = 1;
9322                 if (a->info.consistency_policy == CONSISTENCY_POLICY_PPL) {
9323                         di->ppl_sector = get_ppl_sector(super, inst);
9324                         di->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
9325                 }
9326                 super->random = random32();
9327                 di->next = rv;
9328                 rv = di;
9329                 num_spares++;
9330                 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
9331                         i, di->data_offset);
9332         }
9333
9334         if (!rv)
9335                 /* No spares found */
9336                 return rv;
9337         /* Now 'rv' has a list of devices to return.
9338          * Create a metadata_update record to update the
9339          * disk_ord_tbl for the array
9340          */
9341         mu = xmalloc(sizeof(*mu));
9342         mu->buf = xcalloc(num_spares,
9343                           sizeof(struct imsm_update_activate_spare));
9344         mu->space = NULL;
9345         mu->space_list = NULL;
9346         mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
9347         mu->next = *updates;
9348         u = (struct imsm_update_activate_spare *) mu->buf;
9349
9350         for (di = rv ; di ; di = di->next) {
9351                 u->type = update_activate_spare;
9352                 u->dl = (struct dl *) di->devs;
9353                 di->devs = NULL;
9354                 u->slot = di->disk.raid_disk;
9355                 u->array = inst;
9356                 u->next = u + 1;
9357                 u++;
9358         }
9359         (u-1)->next = NULL;
9360         *updates = mu;
9361
9362         return rv;
9363 }
9364
9365 static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
9366 {
9367         struct imsm_dev *dev = get_imsm_dev(super, idx);
9368         struct imsm_map *map = get_imsm_map(dev, MAP_0);
9369         struct imsm_map *new_map = get_imsm_map(&u->dev, MAP_0);
9370         struct disk_info *inf = get_disk_info(u);
9371         struct imsm_disk *disk;
9372         int i;
9373         int j;
9374
9375         for (i = 0; i < map->num_members; i++) {
9376                 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, MAP_X));
9377                 for (j = 0; j < new_map->num_members; j++)
9378                         if (serialcmp(disk->serial, inf[j].serial) == 0)
9379                                 return 1;
9380         }
9381
9382         return 0;
9383 }
9384
9385 static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
9386 {
9387         struct dl *dl;
9388
9389         for (dl = super->disks; dl; dl = dl->next)
9390                 if (dl->major == major &&  dl->minor == minor)
9391                         return dl;
9392         return NULL;
9393 }
9394
9395 static int remove_disk_super(struct intel_super *super, int major, int minor)
9396 {
9397         struct dl *prev;
9398         struct dl *dl;
9399
9400         prev = NULL;
9401         for (dl = super->disks; dl; dl = dl->next) {
9402                 if (dl->major == major && dl->minor == minor) {
9403                         /* remove */
9404                         if (prev)
9405                                 prev->next = dl->next;
9406                         else
9407                                 super->disks = dl->next;
9408                         dl->next = NULL;
9409                         __free_imsm_disk(dl, 1);
9410                         dprintf("removed %x:%x\n", major, minor);
9411                         break;
9412                 }
9413                 prev = dl;
9414         }
9415         return 0;
9416 }
9417
9418 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
9419
9420 static int add_remove_disk_update(struct intel_super *super)
9421 {
9422         int check_degraded = 0;
9423         struct dl *disk;
9424
9425         /* add/remove some spares to/from the metadata/contrainer */
9426         while (super->disk_mgmt_list) {
9427                 struct dl *disk_cfg;
9428
9429                 disk_cfg = super->disk_mgmt_list;
9430                 super->disk_mgmt_list = disk_cfg->next;
9431                 disk_cfg->next = NULL;
9432
9433                 if (disk_cfg->action == DISK_ADD) {
9434                         disk_cfg->next = super->disks;
9435                         super->disks = disk_cfg;
9436                         check_degraded = 1;
9437                         dprintf("added %x:%x\n",
9438                                 disk_cfg->major, disk_cfg->minor);
9439                 } else if (disk_cfg->action == DISK_REMOVE) {
9440                         dprintf("Disk remove action processed: %x.%x\n",
9441                                 disk_cfg->major, disk_cfg->minor);
9442                         disk = get_disk_super(super,
9443                                               disk_cfg->major,
9444                                               disk_cfg->minor);
9445                         if (disk) {
9446                                 /* store action status */
9447                                 disk->action = DISK_REMOVE;
9448                                 /* remove spare disks only */
9449                                 if (disk->index == -1) {
9450                                         remove_disk_super(super,
9451                                                           disk_cfg->major,
9452                                                           disk_cfg->minor);
9453                                 } else {
9454                                         disk_cfg->fd = disk->fd;
9455                                         disk->fd = -1;
9456                                 }
9457                         }
9458                         /* release allocate disk structure */
9459                         __free_imsm_disk(disk_cfg, 1);
9460                 }
9461         }
9462         return check_degraded;
9463 }
9464
9465 static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
9466                                                 struct intel_super *super,
9467                                                 void ***space_list)
9468 {
9469         struct intel_dev *id;
9470         void **tofree = NULL;
9471         int ret_val = 0;
9472
9473         dprintf("(enter)\n");
9474         if (u->subdev < 0 || u->subdev > 1) {
9475                 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
9476                 return ret_val;
9477         }
9478         if (space_list == NULL || *space_list == NULL) {
9479                 dprintf("imsm: Error: Memory is not allocated\n");
9480                 return ret_val;
9481         }
9482
9483         for (id = super->devlist ; id; id = id->next) {
9484                 if (id->index == (unsigned)u->subdev) {
9485                         struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
9486                         struct imsm_map *map;
9487                         struct imsm_dev *new_dev =
9488                                 (struct imsm_dev *)*space_list;
9489                         struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
9490                         int to_state;
9491                         struct dl *new_disk;
9492
9493                         if (new_dev == NULL)
9494                                 return ret_val;
9495                         *space_list = **space_list;
9496                         memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
9497                         map = get_imsm_map(new_dev, MAP_0);
9498                         if (migr_map) {
9499                                 dprintf("imsm: Error: migration in progress");
9500                                 return ret_val;
9501                         }
9502
9503                         to_state = map->map_state;
9504                         if ((u->new_level == 5) && (map->raid_level == 0)) {
9505                                 map->num_members++;
9506                                 /* this should not happen */
9507                                 if (u->new_disks[0] < 0) {
9508                                         map->failed_disk_num =
9509                                                 map->num_members - 1;
9510                                         to_state = IMSM_T_STATE_DEGRADED;
9511                                 } else
9512                                         to_state = IMSM_T_STATE_NORMAL;
9513                         }
9514                         migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
9515                         if (u->new_level > -1)
9516                                 map->raid_level = u->new_level;
9517                         migr_map = get_imsm_map(new_dev, MAP_1);
9518                         if ((u->new_level == 5) &&
9519                             (migr_map->raid_level == 0)) {
9520                                 int ord = map->num_members - 1;
9521                                 migr_map->num_members--;
9522                                 if (u->new_disks[0] < 0)
9523                                         ord |= IMSM_ORD_REBUILD;
9524                                 set_imsm_ord_tbl_ent(map,
9525                                                      map->num_members - 1,
9526                                                      ord);
9527                         }
9528                         id->dev = new_dev;
9529                         tofree = (void **)dev;
9530
9531                         /* update chunk size
9532                          */
9533                         if (u->new_chunksize > 0) {
9534                                 struct imsm_map *dest_map =
9535                                         get_imsm_map(dev, MAP_0);
9536                                 int used_disks =
9537                                         imsm_num_data_members(dest_map);
9538
9539                                 if (used_disks == 0)
9540                                         return ret_val;
9541
9542                                 map->blocks_per_strip =
9543                                         __cpu_to_le16(u->new_chunksize * 2);
9544                                 update_num_data_stripes(map, imsm_dev_size(dev));
9545                         }
9546
9547                         /* ensure blocks_per_member has valid value
9548                          */
9549                         set_blocks_per_member(map,
9550                                               per_dev_array_size(map) +
9551                                               NUM_BLOCKS_DIRTY_STRIPE_REGION);
9552
9553                         /* add disk
9554                          */
9555                         if (u->new_level != 5 || migr_map->raid_level != 0 ||
9556                             migr_map->raid_level == map->raid_level)
9557                                 goto skip_disk_add;
9558
9559                         if (u->new_disks[0] >= 0) {
9560                                 /* use passes spare
9561                                  */
9562                                 new_disk = get_disk_super(super,
9563                                                         major(u->new_disks[0]),
9564                                                         minor(u->new_disks[0]));
9565                                 dprintf("imsm: new disk for reshape is: %i:%i (%p, index = %i)\n",
9566                                         major(u->new_disks[0]),
9567                                         minor(u->new_disks[0]),
9568                                         new_disk, new_disk->index);
9569                                 if (new_disk == NULL)
9570                                         goto error_disk_add;
9571
9572                                 new_disk->index = map->num_members - 1;
9573                                 /* slot to fill in autolayout
9574                                  */
9575                                 new_disk->raiddisk = new_disk->index;
9576                                 new_disk->disk.status |= CONFIGURED_DISK;
9577                                 new_disk->disk.status &= ~SPARE_DISK;
9578                         } else
9579                                 goto error_disk_add;
9580
9581 skip_disk_add:
9582                         *tofree = *space_list;
9583                         /* calculate new size
9584                          */
9585                         imsm_set_array_size(new_dev, -1);
9586
9587                         ret_val = 1;
9588                 }
9589         }
9590
9591         if (tofree)
9592                 *space_list = tofree;
9593         return ret_val;
9594
9595 error_disk_add:
9596         dprintf("Error: imsm: Cannot find disk.\n");
9597         return ret_val;
9598 }
9599
9600 static int apply_size_change_update(struct imsm_update_size_change *u,
9601                 struct intel_super *super)
9602 {
9603         struct intel_dev *id;
9604         int ret_val = 0;
9605
9606         dprintf("(enter)\n");
9607         if (u->subdev < 0 || u->subdev > 1) {
9608                 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
9609                 return ret_val;
9610         }
9611
9612         for (id = super->devlist ; id; id = id->next) {
9613                 if (id->index == (unsigned)u->subdev) {
9614                         struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
9615                         struct imsm_map *map = get_imsm_map(dev, MAP_0);
9616                         int used_disks = imsm_num_data_members(map);
9617                         unsigned long long blocks_per_member;
9618                         unsigned long long new_size_per_disk;
9619
9620                         if (used_disks == 0)
9621                                 return 0;
9622
9623                         /* calculate new size
9624                          */
9625                         new_size_per_disk = u->new_size / used_disks;
9626                         blocks_per_member = new_size_per_disk +
9627                                             NUM_BLOCKS_DIRTY_STRIPE_REGION;
9628
9629                         imsm_set_array_size(dev, u->new_size);
9630                         set_blocks_per_member(map, blocks_per_member);
9631                         update_num_data_stripes(map, u->new_size);
9632                         ret_val = 1;
9633                         break;
9634                 }
9635         }
9636
9637         return ret_val;
9638 }
9639
9640 static int prepare_spare_to_activate(struct supertype *st,
9641                                      struct imsm_update_activate_spare *u)
9642 {
9643         struct intel_super *super = st->sb;
9644         int prev_current_vol = super->current_vol;
9645         struct active_array *a;
9646         int ret = 1;
9647
9648         for (a = st->arrays; a; a = a->next)
9649                 /*
9650                  * Additional initialization (adding bitmap header, filling
9651                  * the bitmap area with '1's to force initial rebuild for a whole
9652                  * data-area) is required when adding the spare to the volume
9653                  * with write-intent bitmap.
9654                  */
9655                 if (a->info.container_member == u->array &&
9656                     a->info.consistency_policy == CONSISTENCY_POLICY_BITMAP) {
9657                         struct dl *dl;
9658
9659                         for (dl = super->disks; dl; dl = dl->next)
9660                                 if (dl == u->dl)
9661                                         break;
9662                         if (!dl)
9663                                 break;
9664
9665                         super->current_vol = u->array;
9666                         if (st->ss->write_bitmap(st, dl->fd, NoUpdate))
9667                                 ret = 0;
9668                         super->current_vol = prev_current_vol;
9669                 }
9670         return ret;
9671 }
9672
9673 static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
9674                                        struct intel_super *super,
9675                                        struct active_array *active_array)
9676 {
9677         struct imsm_super *mpb = super->anchor;
9678         struct imsm_dev *dev = get_imsm_dev(super, u->array);
9679         struct imsm_map *map = get_imsm_map(dev, MAP_0);
9680         struct imsm_map *migr_map;
9681         struct active_array *a;
9682         struct imsm_disk *disk;
9683         __u8 to_state;
9684         struct dl *dl;
9685         unsigned int found;
9686         int failed;
9687         int victim;
9688         int i;
9689         int second_map_created = 0;
9690
9691         for (; u; u = u->next) {
9692                 victim = get_imsm_disk_idx(dev, u->slot, MAP_X);
9693
9694                 if (victim < 0)
9695                         return 0;
9696
9697                 for (dl = super->disks; dl; dl = dl->next)
9698                         if (dl == u->dl)
9699                                 break;
9700
9701                 if (!dl) {
9702                         pr_err("error: imsm_activate_spare passed an unknown disk (index: %d)\n",
9703                                 u->dl->index);
9704                         return 0;
9705                 }
9706
9707                 /* count failures (excluding rebuilds and the victim)
9708                  * to determine map[0] state
9709                  */
9710                 failed = 0;
9711                 for (i = 0; i < map->num_members; i++) {
9712                         if (i == u->slot)
9713                                 continue;
9714                         disk = get_imsm_disk(super,
9715                                              get_imsm_disk_idx(dev, i, MAP_X));
9716                         if (!disk || is_failed(disk))
9717                                 failed++;
9718                 }
9719
9720                 /* adding a pristine spare, assign a new index */
9721                 if (dl->index < 0) {
9722                         dl->index = super->anchor->num_disks;
9723                         super->anchor->num_disks++;
9724                 }
9725                 disk = &dl->disk;
9726                 disk->status |= CONFIGURED_DISK;
9727                 disk->status &= ~SPARE_DISK;
9728
9729                 /* mark rebuild */
9730                 to_state = imsm_check_degraded(super, dev, failed, MAP_0);
9731                 if (!second_map_created) {
9732                         second_map_created = 1;
9733                         map->map_state = IMSM_T_STATE_DEGRADED;
9734                         migrate(dev, super, to_state, MIGR_REBUILD);
9735                 } else
9736                         map->map_state = to_state;
9737                 migr_map = get_imsm_map(dev, MAP_1);
9738                 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
9739                 set_imsm_ord_tbl_ent(migr_map, u->slot,
9740                                      dl->index | IMSM_ORD_REBUILD);
9741
9742                 /* update the family_num to mark a new container
9743                  * generation, being careful to record the existing
9744                  * family_num in orig_family_num to clean up after
9745                  * earlier mdadm versions that neglected to set it.
9746                  */
9747                 if (mpb->orig_family_num == 0)
9748                         mpb->orig_family_num = mpb->family_num;
9749                 mpb->family_num += super->random;
9750
9751                 /* count arrays using the victim in the metadata */
9752                 found = 0;
9753                 for (a = active_array; a ; a = a->next) {
9754                         int dev_idx = a->info.container_member;
9755
9756                         if (get_disk_slot_in_dev(super, dev_idx, victim) >= 0)
9757                                 found++;
9758                 }
9759
9760                 /* delete the victim if it is no longer being
9761                  * utilized anywhere
9762                  */
9763                 if (!found) {
9764                         struct dl **dlp;
9765
9766                         /* We know that 'manager' isn't touching anything,
9767                          * so it is safe to delete
9768                          */
9769                         for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
9770                                 if ((*dlp)->index == victim)
9771                                         break;
9772
9773                         /* victim may be on the missing list */
9774                         if (!*dlp)
9775                                 for (dlp = &super->missing; *dlp;
9776                                      dlp = &(*dlp)->next)
9777                                         if ((*dlp)->index == victim)
9778                                                 break;
9779                         imsm_delete(super, dlp, victim);
9780                 }
9781         }
9782
9783         return 1;
9784 }
9785
9786 static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
9787                                                 struct intel_super *super,
9788                                                 void ***space_list)
9789 {
9790         struct dl *new_disk;
9791         struct intel_dev *id;
9792         int i;
9793         int delta_disks = u->new_raid_disks - u->old_raid_disks;
9794         int disk_count = u->old_raid_disks;
9795         void **tofree = NULL;
9796         int devices_to_reshape = 1;
9797         struct imsm_super *mpb = super->anchor;
9798         int ret_val = 0;
9799         unsigned int dev_id;
9800
9801         dprintf("(enter)\n");
9802
9803         /* enable spares to use in array */
9804         for (i = 0; i < delta_disks; i++) {
9805                 new_disk = get_disk_super(super,
9806                                           major(u->new_disks[i]),
9807                                           minor(u->new_disks[i]));
9808                 dprintf("imsm: new disk for reshape is: %i:%i (%p, index = %i)\n",
9809                         major(u->new_disks[i]), minor(u->new_disks[i]),
9810                         new_disk, new_disk->index);
9811                 if (new_disk == NULL ||
9812                     (new_disk->index >= 0 &&
9813                      new_disk->index < u->old_raid_disks))
9814                         goto update_reshape_exit;
9815                 new_disk->index = disk_count++;
9816                 /* slot to fill in autolayout
9817                  */
9818                 new_disk->raiddisk = new_disk->index;
9819                 new_disk->disk.status |=
9820                         CONFIGURED_DISK;
9821                 new_disk->disk.status &= ~SPARE_DISK;
9822         }
9823
9824         dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
9825                 mpb->num_raid_devs);
9826         /* manage changes in volume
9827          */
9828         for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
9829                 void **sp = *space_list;
9830                 struct imsm_dev *newdev;
9831                 struct imsm_map *newmap, *oldmap;
9832
9833                 for (id = super->devlist ; id; id = id->next) {
9834                         if (id->index == dev_id)
9835                                 break;
9836                 }
9837                 if (id == NULL)
9838                         break;
9839                 if (!sp)
9840                         continue;
9841                 *space_list = *sp;
9842                 newdev = (void*)sp;
9843                 /* Copy the dev, but not (all of) the map */
9844                 memcpy(newdev, id->dev, sizeof(*newdev));
9845                 oldmap = get_imsm_map(id->dev, MAP_0);
9846                 newmap = get_imsm_map(newdev, MAP_0);
9847                 /* Copy the current map */
9848                 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
9849                 /* update one device only
9850                  */
9851                 if (devices_to_reshape) {
9852                         dprintf("imsm: modifying subdev: %i\n",
9853                                 id->index);
9854                         devices_to_reshape--;
9855                         newdev->vol.migr_state = 1;
9856                         set_vol_curr_migr_unit(newdev, 0);
9857                         set_migr_type(newdev, MIGR_GEN_MIGR);
9858                         newmap->num_members = u->new_raid_disks;
9859                         for (i = 0; i < delta_disks; i++) {
9860                                 set_imsm_ord_tbl_ent(newmap,
9861                                                      u->old_raid_disks + i,
9862                                                      u->old_raid_disks + i);
9863                         }
9864                         /* New map is correct, now need to save old map
9865                          */
9866                         newmap = get_imsm_map(newdev, MAP_1);
9867                         memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
9868
9869                         imsm_set_array_size(newdev, -1);
9870                 }
9871
9872                 sp = (void **)id->dev;
9873                 id->dev = newdev;
9874                 *sp = tofree;
9875                 tofree = sp;
9876
9877                 /* Clear migration record */
9878                 memset(super->migr_rec, 0, sizeof(struct migr_record));
9879         }
9880         if (tofree)
9881                 *space_list = tofree;
9882         ret_val = 1;
9883
9884 update_reshape_exit:
9885
9886         return ret_val;
9887 }
9888
9889 static int apply_takeover_update(struct imsm_update_takeover *u,
9890                                  struct intel_super *super,
9891                                  void ***space_list)
9892 {
9893         struct imsm_dev *dev = NULL;
9894         struct intel_dev *dv;
9895         struct imsm_dev *dev_new;
9896         struct imsm_map *map;
9897         struct dl *dm, *du;
9898         int i;
9899
9900         for (dv = super->devlist; dv; dv = dv->next)
9901                 if (dv->index == (unsigned int)u->subarray) {
9902                         dev = dv->dev;
9903                         break;
9904                 }
9905
9906         if (dev == NULL)
9907                 return 0;
9908
9909         map = get_imsm_map(dev, MAP_0);
9910
9911         if (u->direction == R10_TO_R0) {
9912                 /* Number of failed disks must be half of initial disk number */
9913                 if (imsm_count_failed(super, dev, MAP_0) !=
9914                                 (map->num_members / 2))
9915                         return 0;
9916
9917                 /* iterate through devices to mark removed disks as spare */
9918                 for (dm = super->disks; dm; dm = dm->next) {
9919                         if (dm->disk.status & FAILED_DISK) {
9920                                 int idx = dm->index;
9921                                 /* update indexes on the disk list */
9922 /* FIXME this loop-with-the-loop looks wrong,  I'm not convinced
9923    the index values will end up being correct.... NB */
9924                                 for (du = super->disks; du; du = du->next)
9925                                         if (du->index > idx)
9926                                                 du->index--;
9927                                 /* mark as spare disk */
9928                                 mark_spare(dm);
9929                         }
9930                 }
9931                 /* update map */
9932                 map->num_members /= map->num_domains;
9933                 map->map_state = IMSM_T_STATE_NORMAL;
9934                 map->raid_level = 0;
9935                 set_num_domains(map);
9936                 update_num_data_stripes(map, imsm_dev_size(dev));
9937                 map->failed_disk_num = -1;
9938         }
9939
9940         if (u->direction == R0_TO_R10) {
9941                 void **space;
9942
9943                 /* update slots in current disk list */
9944                 for (dm = super->disks; dm; dm = dm->next) {
9945                         if (dm->index >= 0)
9946                                 dm->index *= 2;
9947                 }
9948                 /* create new *missing* disks */
9949                 for (i = 0; i < map->num_members; i++) {
9950                         space = *space_list;
9951                         if (!space)
9952                                 continue;
9953                         *space_list = *space;
9954                         du = (void *)space;
9955                         memcpy(du, super->disks, sizeof(*du));
9956                         du->fd = -1;
9957                         du->minor = 0;
9958                         du->major = 0;
9959                         du->index = (i * 2) + 1;
9960                         sprintf((char *)du->disk.serial,
9961                                 " MISSING_%d", du->index);
9962                         sprintf((char *)du->serial,
9963                                 "MISSING_%d", du->index);
9964                         du->next = super->missing;
9965                         super->missing = du;
9966                 }
9967                 /* create new dev and map */
9968                 space = *space_list;
9969                 if (!space)
9970                         return 0;
9971                 *space_list = *space;
9972                 dev_new = (void *)space;
9973                 memcpy(dev_new, dev, sizeof(*dev));
9974                 /* update new map */
9975                 map = get_imsm_map(dev_new, MAP_0);
9976
9977                 map->map_state = IMSM_T_STATE_DEGRADED;
9978                 map->raid_level = 1;
9979                 set_num_domains(map);
9980                 map->num_members = map->num_members * map->num_domains;
9981                 update_num_data_stripes(map, imsm_dev_size(dev));
9982
9983                 /* replace dev<->dev_new */
9984                 dv->dev = dev_new;
9985         }
9986         /* update disk order table */
9987         for (du = super->disks; du; du = du->next)
9988                 if (du->index >= 0)
9989                         set_imsm_ord_tbl_ent(map, du->index, du->index);
9990         for (du = super->missing; du; du = du->next)
9991                 if (du->index >= 0) {
9992                         set_imsm_ord_tbl_ent(map, du->index, du->index);
9993                         mark_missing(super, dv->dev, &du->disk, du->index);
9994                 }
9995
9996         return 1;
9997 }
9998
9999 static void imsm_process_update(struct supertype *st,
10000                                 struct metadata_update *update)
10001 {
10002         /**
10003          * crack open the metadata_update envelope to find the update record
10004          * update can be one of:
10005          *    update_reshape_container_disks - all the arrays in the container
10006          *      are being reshaped to have more devices.  We need to mark
10007          *      the arrays for general migration and convert selected spares
10008          *      into active devices.
10009          *    update_activate_spare - a spare device has replaced a failed
10010          *      device in an array, update the disk_ord_tbl.  If this disk is
10011          *      present in all member arrays then also clear the SPARE_DISK
10012          *      flag
10013          *    update_create_array
10014          *    update_kill_array
10015          *    update_rename_array
10016          *    update_add_remove_disk
10017          */
10018         struct intel_super *super = st->sb;
10019         struct imsm_super *mpb;
10020         enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
10021
10022         /* update requires a larger buf but the allocation failed */
10023         if (super->next_len && !super->next_buf) {
10024                 super->next_len = 0;
10025                 return;
10026         }
10027
10028         if (super->next_buf) {
10029                 memcpy(super->next_buf, super->buf, super->len);
10030                 free(super->buf);
10031                 super->len = super->next_len;
10032                 super->buf = super->next_buf;
10033
10034                 super->next_len = 0;
10035                 super->next_buf = NULL;
10036         }
10037
10038         mpb = super->anchor;
10039
10040         switch (type) {
10041         case update_general_migration_checkpoint: {
10042                 struct intel_dev *id;
10043                 struct imsm_update_general_migration_checkpoint *u =
10044                                                         (void *)update->buf;
10045
10046                 dprintf("called for update_general_migration_checkpoint\n");
10047
10048                 /* find device under general migration */
10049                 for (id = super->devlist ; id; id = id->next) {
10050                         if (is_gen_migration(id->dev)) {
10051                                 set_vol_curr_migr_unit(id->dev,
10052                                                    u->curr_migr_unit);
10053                                 super->updates_pending++;
10054                         }
10055                 }
10056                 break;
10057         }
10058         case update_takeover: {
10059                 struct imsm_update_takeover *u = (void *)update->buf;
10060                 if (apply_takeover_update(u, super, &update->space_list)) {
10061                         imsm_update_version_info(super);
10062                         super->updates_pending++;
10063                 }
10064                 break;
10065         }
10066
10067         case update_reshape_container_disks: {
10068                 struct imsm_update_reshape *u = (void *)update->buf;
10069                 if (apply_reshape_container_disks_update(
10070                             u, super, &update->space_list))
10071                         super->updates_pending++;
10072                 break;
10073         }
10074         case update_reshape_migration: {
10075                 struct imsm_update_reshape_migration *u = (void *)update->buf;
10076                 if (apply_reshape_migration_update(
10077                             u, super, &update->space_list))
10078                         super->updates_pending++;
10079                 break;
10080         }
10081         case update_size_change: {
10082                 struct imsm_update_size_change *u = (void *)update->buf;
10083                 if (apply_size_change_update(u, super))
10084                         super->updates_pending++;
10085                 break;
10086         }
10087         case update_activate_spare: {
10088                 struct imsm_update_activate_spare *u = (void *) update->buf;
10089
10090                 if (prepare_spare_to_activate(st, u) &&
10091                     apply_update_activate_spare(u, super, st->arrays))
10092                         super->updates_pending++;
10093                 break;
10094         }
10095         case update_create_array: {
10096                 /* someone wants to create a new array, we need to be aware of
10097                  * a few races/collisions:
10098                  * 1/ 'Create' called by two separate instances of mdadm
10099                  * 2/ 'Create' versus 'activate_spare': mdadm has chosen
10100                  *     devices that have since been assimilated via
10101                  *     activate_spare.
10102                  * In the event this update can not be carried out mdadm will
10103                  * (FIX ME) notice that its update did not take hold.
10104                  */
10105                 struct imsm_update_create_array *u = (void *) update->buf;
10106                 struct intel_dev *dv;
10107                 struct imsm_dev *dev;
10108                 struct imsm_map *map, *new_map;
10109                 unsigned long long start, end;
10110                 unsigned long long new_start, new_end;
10111                 int i;
10112                 struct disk_info *inf;
10113                 struct dl *dl;
10114
10115                 /* handle racing creates: first come first serve */
10116                 if (u->dev_idx < mpb->num_raid_devs) {
10117                         dprintf("subarray %d already defined\n", u->dev_idx);
10118                         goto create_error;
10119                 }
10120
10121                 /* check update is next in sequence */
10122                 if (u->dev_idx != mpb->num_raid_devs) {
10123                         dprintf("can not create array %d expected index %d\n",
10124                                 u->dev_idx, mpb->num_raid_devs);
10125                         goto create_error;
10126                 }
10127
10128                 new_map = get_imsm_map(&u->dev, MAP_0);
10129                 new_start = pba_of_lba0(new_map);
10130                 new_end = new_start + per_dev_array_size(new_map);
10131                 inf = get_disk_info(u);
10132
10133                 /* handle activate_spare versus create race:
10134                  * check to make sure that overlapping arrays do not include
10135                  * overalpping disks
10136                  */
10137                 for (i = 0; i < mpb->num_raid_devs; i++) {
10138                         dev = get_imsm_dev(super, i);
10139                         map = get_imsm_map(dev, MAP_0);
10140                         start = pba_of_lba0(map);
10141                         end = start + per_dev_array_size(map);
10142                         if ((new_start >= start && new_start <= end) ||
10143                             (start >= new_start && start <= new_end))
10144                                 /* overlap */;
10145                         else
10146                                 continue;
10147
10148                         if (disks_overlap(super, i, u)) {
10149                                 dprintf("arrays overlap\n");
10150                                 goto create_error;
10151                         }
10152                 }
10153
10154                 /* check that prepare update was successful */
10155                 if (!update->space) {
10156                         dprintf("prepare update failed\n");
10157                         goto create_error;
10158                 }
10159
10160                 /* check that all disks are still active before committing
10161                  * changes.  FIXME: could we instead handle this by creating a
10162                  * degraded array?  That's probably not what the user expects,
10163                  * so better to drop this update on the floor.
10164                  */
10165                 for (i = 0; i < new_map->num_members; i++) {
10166                         dl = serial_to_dl(inf[i].serial, super);
10167                         if (!dl) {
10168                                 dprintf("disk disappeared\n");
10169                                 goto create_error;
10170                         }
10171                 }
10172
10173                 super->updates_pending++;
10174
10175                 /* convert spares to members and fixup ord_tbl */
10176                 for (i = 0; i < new_map->num_members; i++) {
10177                         dl = serial_to_dl(inf[i].serial, super);
10178                         if (dl->index == -1) {
10179                                 dl->index = mpb->num_disks;
10180                                 mpb->num_disks++;
10181                                 dl->disk.status |= CONFIGURED_DISK;
10182                                 dl->disk.status &= ~SPARE_DISK;
10183                         }
10184                         set_imsm_ord_tbl_ent(new_map, i, dl->index);
10185                 }
10186
10187                 dv = update->space;
10188                 dev = dv->dev;
10189                 update->space = NULL;
10190                 imsm_copy_dev(dev, &u->dev);
10191                 dv->index = u->dev_idx;
10192                 dv->next = super->devlist;
10193                 super->devlist = dv;
10194                 mpb->num_raid_devs++;
10195
10196                 imsm_update_version_info(super);
10197                 break;
10198  create_error:
10199                 /* mdmon knows how to release update->space, but not
10200                  * ((struct intel_dev *) update->space)->dev
10201                  */
10202                 if (update->space) {
10203                         dv = update->space;
10204                         free(dv->dev);
10205                 }
10206                 break;
10207         }
10208         case update_kill_array: {
10209                 struct imsm_update_kill_array *u = (void *) update->buf;
10210                 int victim = u->dev_idx;
10211                 struct active_array *a;
10212                 struct intel_dev **dp;
10213
10214                 /* sanity check that we are not affecting the uuid of
10215                  * active arrays, or deleting an active array
10216                  *
10217                  * FIXME when immutable ids are available, but note that
10218                  * we'll also need to fixup the invalidated/active
10219                  * subarray indexes in mdstat
10220                  */
10221                 for (a = st->arrays; a; a = a->next)
10222                         if (a->info.container_member >= victim)
10223                                 break;
10224                 /* by definition if mdmon is running at least one array
10225                  * is active in the container, so checking
10226                  * mpb->num_raid_devs is just extra paranoia
10227                  */
10228                 if (a || mpb->num_raid_devs == 1 || victim >= super->anchor->num_raid_devs) {
10229                         dprintf("failed to delete subarray-%d\n", victim);
10230                         break;
10231                 }
10232
10233                 for (dp = &super->devlist; *dp;)
10234                         if ((*dp)->index == (unsigned)super->current_vol) {
10235                                 *dp = (*dp)->next;
10236                         } else {
10237                                 if ((*dp)->index > (unsigned)victim)
10238                                         (*dp)->index--;
10239                                 dp = &(*dp)->next;
10240                         }
10241                 mpb->num_raid_devs--;
10242                 super->updates_pending++;
10243                 break;
10244         }
10245         case update_rename_array: {
10246                 struct imsm_update_rename_array *u = (void *) update->buf;
10247                 char name[MAX_RAID_SERIAL_LEN+1];
10248                 int target = u->dev_idx;
10249                 struct active_array *a;
10250                 struct imsm_dev *dev;
10251
10252                 /* sanity check that we are not affecting the uuid of
10253                  * an active array
10254                  */
10255                 memset(name, 0, sizeof(name));
10256                 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
10257                 name[MAX_RAID_SERIAL_LEN] = '\0';
10258                 for (a = st->arrays; a; a = a->next)
10259                         if (a->info.container_member == target)
10260                                 break;
10261                 dev = get_imsm_dev(super, u->dev_idx);
10262
10263                 if (a || !dev || imsm_is_name_allowed(super, name, 0) == false) {
10264                         dprintf("failed to rename subarray-%d\n", target);
10265                         break;
10266                 }
10267
10268                 memcpy(dev->volume, name, MAX_RAID_SERIAL_LEN);
10269                 super->updates_pending++;
10270                 break;
10271         }
10272         case update_add_remove_disk: {
10273                 /* we may be able to repair some arrays if disks are
10274                  * being added, check the status of add_remove_disk
10275                  * if discs has been added.
10276                  */
10277                 if (add_remove_disk_update(super)) {
10278                         struct active_array *a;
10279
10280                         super->updates_pending++;
10281                         for (a = st->arrays; a; a = a->next)
10282                                 a->check_degraded = 1;
10283                 }
10284                 break;
10285         }
10286         case update_prealloc_badblocks_mem:
10287                 break;
10288         case update_rwh_policy: {
10289                 struct imsm_update_rwh_policy *u = (void *)update->buf;
10290                 int target = u->dev_idx;
10291                 struct imsm_dev *dev = get_imsm_dev(super, target);
10292
10293                 if (dev->rwh_policy != u->new_policy) {
10294                         dev->rwh_policy = u->new_policy;
10295                         super->updates_pending++;
10296                 }
10297                 break;
10298         }
10299         default:
10300                 pr_err("error: unsupported process update type:(type: %d)\n",   type);
10301         }
10302 }
10303
10304 static struct mdinfo *get_spares_for_grow(struct supertype *st);
10305
10306 static int imsm_prepare_update(struct supertype *st,
10307                                struct metadata_update *update)
10308 {
10309         /**
10310          * Allocate space to hold new disk entries, raid-device entries or a new
10311          * mpb if necessary.  The manager synchronously waits for updates to
10312          * complete in the monitor, so new mpb buffers allocated here can be
10313          * integrated by the monitor thread without worrying about live pointers
10314          * in the manager thread.
10315          */
10316         enum imsm_update_type type;
10317         struct intel_super *super = st->sb;
10318         unsigned int sector_size = super->sector_size;
10319         struct imsm_super *mpb = super->anchor;
10320         size_t buf_len;
10321         size_t len = 0;
10322
10323         if (update->len < (int)sizeof(type))
10324                 return 0;
10325
10326         type = *(enum imsm_update_type *) update->buf;
10327
10328         switch (type) {
10329         case update_general_migration_checkpoint:
10330                 if (update->len < (int)sizeof(struct imsm_update_general_migration_checkpoint))
10331                         return 0;
10332                 dprintf("called for update_general_migration_checkpoint\n");
10333                 break;
10334         case update_takeover: {
10335                 struct imsm_update_takeover *u = (void *)update->buf;
10336                 if (update->len < (int)sizeof(*u))
10337                         return 0;
10338                 if (u->direction == R0_TO_R10) {
10339                         void **tail = (void **)&update->space_list;
10340                         struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
10341                         struct imsm_map *map = get_imsm_map(dev, MAP_0);
10342                         int num_members = map->num_members;
10343                         void *space;
10344                         int size, i;
10345                         /* allocate memory for added disks */
10346                         for (i = 0; i < num_members; i++) {
10347                                 size = sizeof(struct dl);
10348                                 space = xmalloc(size);
10349                                 *tail = space;
10350                                 tail = space;
10351                                 *tail = NULL;
10352                         }
10353                         /* allocate memory for new device */
10354                         size = sizeof_imsm_dev(super->devlist->dev, 0) +
10355                                 (num_members * sizeof(__u32));
10356                         space = xmalloc(size);
10357                         *tail = space;
10358                         tail = space;
10359                         *tail = NULL;
10360                         len = disks_to_mpb_size(num_members * 2);
10361                 }
10362
10363                 break;
10364         }
10365         case update_reshape_container_disks: {
10366                 /* Every raid device in the container is about to
10367                  * gain some more devices, and we will enter a
10368                  * reconfiguration.
10369                  * So each 'imsm_map' will be bigger, and the imsm_vol
10370                  * will now hold 2 of them.
10371                  * Thus we need new 'struct imsm_dev' allocations sized
10372                  * as sizeof_imsm_dev but with more devices in both maps.
10373                  */
10374                 struct imsm_update_reshape *u = (void *)update->buf;
10375                 struct intel_dev *dl;
10376                 void **space_tail = (void**)&update->space_list;
10377
10378                 if (update->len < (int)sizeof(*u))
10379                         return 0;
10380
10381                 dprintf("for update_reshape\n");
10382
10383                 for (dl = super->devlist; dl; dl = dl->next) {
10384                         int size = sizeof_imsm_dev(dl->dev, 1);
10385                         void *s;
10386                         if (u->new_raid_disks > u->old_raid_disks)
10387                                 size += sizeof(__u32)*2*
10388                                         (u->new_raid_disks - u->old_raid_disks);
10389                         s = xmalloc(size);
10390                         *space_tail = s;
10391                         space_tail = s;
10392                         *space_tail = NULL;
10393                 }
10394
10395                 len = disks_to_mpb_size(u->new_raid_disks);
10396                 dprintf("New anchor length is %llu\n", (unsigned long long)len);
10397                 break;
10398         }
10399         case update_reshape_migration: {
10400                 /* for migration level 0->5 we need to add disks
10401                  * so the same as for container operation we will copy
10402                  * device to the bigger location.
10403                  * in memory prepared device and new disk area are prepared
10404                  * for usage in process update
10405                  */
10406                 struct imsm_update_reshape_migration *u = (void *)update->buf;
10407                 struct intel_dev *id;
10408                 void **space_tail = (void **)&update->space_list;
10409                 int size;
10410                 void *s;
10411                 int current_level = -1;
10412
10413                 if (update->len < (int)sizeof(*u))
10414                         return 0;
10415
10416                 dprintf("for update_reshape\n");
10417
10418                 /* add space for bigger array in update
10419                  */
10420                 for (id = super->devlist; id; id = id->next) {
10421                         if (id->index == (unsigned)u->subdev) {
10422                                 size = sizeof_imsm_dev(id->dev, 1);
10423                                 if (u->new_raid_disks > u->old_raid_disks)
10424                                         size += sizeof(__u32)*2*
10425                                         (u->new_raid_disks - u->old_raid_disks);
10426                                 s = xmalloc(size);
10427                                 *space_tail = s;
10428                                 space_tail = s;
10429                                 *space_tail = NULL;
10430                                 break;
10431                         }
10432                 }
10433                 if (update->space_list == NULL)
10434                         break;
10435
10436                 /* add space for disk in update
10437                  */
10438                 size = sizeof(struct dl);
10439                 s = xmalloc(size);
10440                 *space_tail = s;
10441                 space_tail = s;
10442                 *space_tail = NULL;
10443
10444                 /* add spare device to update
10445                  */
10446                 for (id = super->devlist ; id; id = id->next)
10447                         if (id->index == (unsigned)u->subdev) {
10448                                 struct imsm_dev *dev;
10449                                 struct imsm_map *map;
10450
10451                                 dev = get_imsm_dev(super, u->subdev);
10452                                 map = get_imsm_map(dev, MAP_0);
10453                                 current_level = map->raid_level;
10454                                 break;
10455                         }
10456                 if (u->new_level == 5 && u->new_level != current_level) {
10457                         struct mdinfo *spares;
10458
10459                         spares = get_spares_for_grow(st);
10460                         if (spares) {
10461                                 struct dl *dl;
10462                                 struct mdinfo *dev;
10463
10464                                 dev = spares->devs;
10465                                 if (dev) {
10466                                         u->new_disks[0] =
10467                                                 makedev(dev->disk.major,
10468                                                         dev->disk.minor);
10469                                         dl = get_disk_super(super,
10470                                                             dev->disk.major,
10471                                                             dev->disk.minor);
10472                                         dl->index = u->old_raid_disks;
10473                                         dev = dev->next;
10474                                 }
10475                                 sysfs_free(spares);
10476                         }
10477                 }
10478                 len = disks_to_mpb_size(u->new_raid_disks);
10479                 dprintf("New anchor length is %llu\n", (unsigned long long)len);
10480                 break;
10481         }
10482         case update_size_change: {
10483                 if (update->len < (int)sizeof(struct imsm_update_size_change))
10484                         return 0;
10485                 break;
10486         }
10487         case update_activate_spare: {
10488                 if (update->len < (int)sizeof(struct imsm_update_activate_spare))
10489                         return 0;
10490                 break;
10491         }
10492         case update_create_array: {
10493                 struct imsm_update_create_array *u = (void *) update->buf;
10494                 struct intel_dev *dv;
10495                 struct imsm_dev *dev = &u->dev;
10496                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
10497                 struct dl *dl;
10498                 struct disk_info *inf;
10499                 int i;
10500                 int activate = 0;
10501
10502                 if (update->len < (int)sizeof(*u))
10503                         return 0;
10504
10505                 inf = get_disk_info(u);
10506                 len = sizeof_imsm_dev(dev, 1);
10507                 /* allocate a new super->devlist entry */
10508                 dv = xmalloc(sizeof(*dv));
10509                 dv->dev = xmalloc(len);
10510                 update->space = dv;
10511
10512                 /* count how many spares will be converted to members */
10513                 for (i = 0; i < map->num_members; i++) {
10514                         dl = serial_to_dl(inf[i].serial, super);
10515                         if (!dl) {
10516                                 /* hmm maybe it failed?, nothing we can do about
10517                                  * it here
10518                                  */
10519                                 continue;
10520                         }
10521                         if (count_memberships(dl, super) == 0)
10522                                 activate++;
10523                 }
10524                 len += activate * sizeof(struct imsm_disk);
10525                 break;
10526         }
10527         case update_kill_array: {
10528                 if (update->len < (int)sizeof(struct imsm_update_kill_array))
10529                         return 0;
10530                 break;
10531         }
10532         case update_rename_array: {
10533                 if (update->len < (int)sizeof(struct imsm_update_rename_array))
10534                         return 0;
10535                 break;
10536         }
10537         case update_add_remove_disk:
10538                 /* no update->len needed */
10539                 break;
10540         case update_prealloc_badblocks_mem:
10541                 super->extra_space += sizeof(struct bbm_log) -
10542                         get_imsm_bbm_log_size(super->bbm_log);
10543                 break;
10544         case update_rwh_policy: {
10545                 if (update->len < (int)sizeof(struct imsm_update_rwh_policy))
10546                         return 0;
10547                 break;
10548         }
10549         default:
10550                 return 0;
10551         }
10552
10553         /* check if we need a larger metadata buffer */
10554         if (super->next_buf)
10555                 buf_len = super->next_len;
10556         else
10557                 buf_len = super->len;
10558
10559         if (__le32_to_cpu(mpb->mpb_size) + super->extra_space + len > buf_len) {
10560                 /* ok we need a larger buf than what is currently allocated
10561                  * if this allocation fails process_update will notice that
10562                  * ->next_len is set and ->next_buf is NULL
10563                  */
10564                 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) +
10565                                    super->extra_space + len, sector_size);
10566                 if (super->next_buf)
10567                         free(super->next_buf);
10568
10569                 super->next_len = buf_len;
10570                 if (posix_memalign(&super->next_buf, sector_size, buf_len) == 0)
10571                         memset(super->next_buf, 0, buf_len);
10572                 else
10573                         super->next_buf = NULL;
10574         }
10575         return 1;
10576 }
10577
10578 /* must be called while manager is quiesced */
10579 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
10580 {
10581         struct imsm_super *mpb = super->anchor;
10582         struct dl *iter;
10583         struct imsm_dev *dev;
10584         struct imsm_map *map;
10585         unsigned int i, j, num_members;
10586         __u32 ord, ord_map0;
10587         struct bbm_log *log = super->bbm_log;
10588
10589         dprintf("deleting device[%d] from imsm_super\n", index);
10590
10591         /* shift all indexes down one */
10592         for (iter = super->disks; iter; iter = iter->next)
10593                 if (iter->index > (int)index)
10594                         iter->index--;
10595         for (iter = super->missing; iter; iter = iter->next)
10596                 if (iter->index > (int)index)
10597                         iter->index--;
10598
10599         for (i = 0; i < mpb->num_raid_devs; i++) {
10600                 dev = get_imsm_dev(super, i);
10601                 map = get_imsm_map(dev, MAP_0);
10602                 num_members = map->num_members;
10603                 for (j = 0; j < num_members; j++) {
10604                         /* update ord entries being careful not to propagate
10605                          * ord-flags to the first map
10606                          */
10607                         ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
10608                         ord_map0 = get_imsm_ord_tbl_ent(dev, j, MAP_0);
10609
10610                         if (ord_to_idx(ord) <= index)
10611                                 continue;
10612
10613                         map = get_imsm_map(dev, MAP_0);
10614                         set_imsm_ord_tbl_ent(map, j, ord_map0 - 1);
10615                         map = get_imsm_map(dev, MAP_1);
10616                         if (map)
10617                                 set_imsm_ord_tbl_ent(map, j, ord - 1);
10618                 }
10619         }
10620
10621         for (i = 0; i < log->entry_count; i++) {
10622                 struct bbm_log_entry *entry = &log->marked_block_entries[i];
10623
10624                 if (entry->disk_ordinal <= index)
10625                         continue;
10626                 entry->disk_ordinal--;
10627         }
10628
10629         mpb->num_disks--;
10630         super->updates_pending++;
10631         if (*dlp) {
10632                 struct dl *dl = *dlp;
10633
10634                 *dlp = (*dlp)->next;
10635                 __free_imsm_disk(dl, 1);
10636         }
10637 }
10638
10639 static int imsm_get_allowed_degradation(int level, int raid_disks,
10640                                         struct intel_super *super,
10641                                         struct imsm_dev *dev)
10642 {
10643         switch (level) {
10644         case 1:
10645         case 10:{
10646                 int ret_val = 0;
10647                 struct imsm_map *map;
10648                 int i;
10649
10650                 ret_val = raid_disks/2;
10651                 /* check map if all disks pairs not failed
10652                  * in both maps
10653                  */
10654                 map = get_imsm_map(dev, MAP_0);
10655                 for (i = 0; i < ret_val; i++) {
10656                         int degradation = 0;
10657                         if (get_imsm_disk(super, i) == NULL)
10658                                 degradation++;
10659                         if (get_imsm_disk(super, i + 1) == NULL)
10660                                 degradation++;
10661                         if (degradation == 2)
10662                                 return 0;
10663                 }
10664                 map = get_imsm_map(dev, MAP_1);
10665                 /* if there is no second map
10666                  * result can be returned
10667                  */
10668                 if (map == NULL)
10669                         return ret_val;
10670                 /* check degradation in second map
10671                  */
10672                 for (i = 0; i < ret_val; i++) {
10673                         int degradation = 0;
10674                 if (get_imsm_disk(super, i) == NULL)
10675                                 degradation++;
10676                         if (get_imsm_disk(super, i + 1) == NULL)
10677                                 degradation++;
10678                         if (degradation == 2)
10679                                 return 0;
10680                 }
10681                 return ret_val;
10682         }
10683         case 5:
10684                 return 1;
10685         case 6:
10686                 return 2;
10687         default:
10688                 return 0;
10689         }
10690 }
10691
10692 /*******************************************************************************
10693  * Function:    validate_container_imsm
10694  * Description: This routine validates container after assemble,
10695  *              eg. if devices in container are under the same controller.
10696  *
10697  * Parameters:
10698  *      info    : linked list with info about devices used in array
10699  * Returns:
10700  *      1 : HBA mismatch
10701  *      0 : Success
10702  ******************************************************************************/
10703 int validate_container_imsm(struct mdinfo *info)
10704 {
10705         if (check_no_platform())
10706                 return 0;
10707
10708         struct sys_dev *idev;
10709         struct sys_dev *hba = NULL;
10710         struct sys_dev *intel_devices = find_intel_devices();
10711         char *dev_path = devt_to_devpath(makedev(info->disk.major,
10712                                                  info->disk.minor), 1, NULL);
10713
10714         for (idev = intel_devices; idev; idev = idev->next) {
10715                 if (dev_path && strstr(dev_path, idev->path)) {
10716                         hba = idev;
10717                         break;
10718                 }
10719         }
10720         if (dev_path)
10721                 free(dev_path);
10722
10723         if (!hba) {
10724                 pr_err("WARNING - Cannot detect HBA for device %s!\n",
10725                                 devid2kname(makedev(info->disk.major, info->disk.minor)));
10726                 return 1;
10727         }
10728
10729         const struct imsm_orom *orom = get_orom_by_device_id(hba->dev_id);
10730         struct mdinfo *dev;
10731
10732         for (dev = info->next; dev; dev = dev->next) {
10733                 dev_path = devt_to_devpath(makedev(dev->disk.major,
10734                                                    dev->disk.minor), 1, NULL);
10735
10736                 struct sys_dev *hba2 = NULL;
10737                 for (idev = intel_devices; idev; idev = idev->next) {
10738                         if (dev_path && strstr(dev_path, idev->path)) {
10739                                 hba2 = idev;
10740                                 break;
10741                         }
10742                 }
10743                 if (dev_path)
10744                         free(dev_path);
10745
10746                 const struct imsm_orom *orom2 = hba2 == NULL ? NULL :
10747                                 get_orom_by_device_id(hba2->dev_id);
10748
10749                 if (hba2 && hba->type != hba2->type) {
10750                         pr_err("WARNING - HBAs of devices do not match %s != %s\n",
10751                                 get_sys_dev_type(hba->type), get_sys_dev_type(hba2->type));
10752                         return 1;
10753                 }
10754
10755                 if (orom != orom2) {
10756                         pr_err("WARNING - IMSM container assembled with disks under different HBAs!\n"
10757                                 "       This operation is not supported and can lead to data loss.\n");
10758                         return 1;
10759                 }
10760
10761                 if (!orom) {
10762                         pr_err("WARNING - IMSM container assembled with disks under HBAs without IMSM platform support!\n"
10763                                 "       This operation is not supported and can lead to data loss.\n");
10764                         return 1;
10765                 }
10766         }
10767
10768         return 0;
10769 }
10770
10771 /*******************************************************************************
10772 * Function:   imsm_record_badblock
10773 * Description: This routine stores new bad block record in BBM log
10774 *
10775 * Parameters:
10776 *     a         : array containing a bad block
10777 *     slot      : disk number containing a bad block
10778 *     sector    : bad block sector
10779 *     length    : bad block sectors range
10780 * Returns:
10781 *     1 : Success
10782 *     0 : Error
10783 ******************************************************************************/
10784 static int imsm_record_badblock(struct active_array *a, int slot,
10785                           unsigned long long sector, int length)
10786 {
10787         struct intel_super *super = a->container->sb;
10788         int ord;
10789         int ret;
10790
10791         ord = imsm_disk_slot_to_ord(a, slot);
10792         if (ord < 0)
10793                 return 0;
10794
10795         ret = record_new_badblock(super->bbm_log, ord_to_idx(ord), sector,
10796                                    length);
10797         if (ret)
10798                 super->updates_pending++;
10799
10800         return ret;
10801 }
10802 /*******************************************************************************
10803 * Function:   imsm_clear_badblock
10804 * Description: This routine clears bad block record from BBM log
10805 *
10806 * Parameters:
10807 *     a         : array containing a bad block
10808 *     slot      : disk number containing a bad block
10809 *     sector    : bad block sector
10810 *     length    : bad block sectors range
10811 * Returns:
10812 *     1 : Success
10813 *     0 : Error
10814 ******************************************************************************/
10815 static int imsm_clear_badblock(struct active_array *a, int slot,
10816                         unsigned long long sector, int length)
10817 {
10818         struct intel_super *super = a->container->sb;
10819         int ord;
10820         int ret;
10821
10822         ord = imsm_disk_slot_to_ord(a, slot);
10823         if (ord < 0)
10824                 return 0;
10825
10826         ret = clear_badblock(super->bbm_log, ord_to_idx(ord), sector, length);
10827         if (ret)
10828                 super->updates_pending++;
10829
10830         return ret;
10831 }
10832 /*******************************************************************************
10833 * Function:   imsm_get_badblocks
10834 * Description: This routine get list of bad blocks for an array
10835 *
10836 * Parameters:
10837 *     a         : array
10838 *     slot      : disk number
10839 * Returns:
10840 *     bb        : structure containing bad blocks
10841 *     NULL      : error
10842 ******************************************************************************/
10843 static struct md_bb *imsm_get_badblocks(struct active_array *a, int slot)
10844 {
10845         int inst = a->info.container_member;
10846         struct intel_super *super = a->container->sb;
10847         struct imsm_dev *dev = get_imsm_dev(super, inst);
10848         struct imsm_map *map = get_imsm_map(dev, MAP_0);
10849         int ord;
10850
10851         ord = imsm_disk_slot_to_ord(a, slot);
10852         if (ord < 0)
10853                 return NULL;
10854
10855         get_volume_badblocks(super->bbm_log, ord_to_idx(ord), pba_of_lba0(map),
10856                              per_dev_array_size(map), &super->bb);
10857
10858         return &super->bb;
10859 }
10860 /*******************************************************************************
10861 * Function:   examine_badblocks_imsm
10862 * Description: Prints list of bad blocks on a disk to the standard output
10863 *
10864 * Parameters:
10865 *     st        : metadata handler
10866 *     fd        : open file descriptor for device
10867 *     devname   : device name
10868 * Returns:
10869 *     0 : Success
10870 *     1 : Error
10871 ******************************************************************************/
10872 static int examine_badblocks_imsm(struct supertype *st, int fd, char *devname)
10873 {
10874         struct intel_super *super = st->sb;
10875         struct bbm_log *log = super->bbm_log;
10876         struct dl *d = NULL;
10877         int any = 0;
10878
10879         for (d = super->disks; d ; d = d->next) {
10880                 if (strcmp(d->devname, devname) == 0)
10881                         break;
10882         }
10883
10884         if ((d == NULL) || (d->index < 0)) { /* serial mismatch probably */
10885                 pr_err("%s doesn't appear to be part of a raid array\n",
10886                        devname);
10887                 return 1;
10888         }
10889
10890         if (log != NULL) {
10891                 unsigned int i;
10892                 struct bbm_log_entry *entry = &log->marked_block_entries[0];
10893
10894                 for (i = 0; i < log->entry_count; i++) {
10895                         if (entry[i].disk_ordinal == d->index) {
10896                                 unsigned long long sector = __le48_to_cpu(
10897                                         &entry[i].defective_block_start);
10898                                 int cnt = entry[i].marked_count + 1;
10899
10900                                 if (!any) {
10901                                         printf("Bad-blocks on %s:\n", devname);
10902                                         any = 1;
10903                                 }
10904
10905                                 printf("%20llu for %d sectors\n", sector, cnt);
10906                         }
10907                 }
10908         }
10909
10910         if (!any)
10911                 printf("No bad-blocks list configured on %s\n", devname);
10912
10913         return 0;
10914 }
10915 /*******************************************************************************
10916  * Function:    init_migr_record_imsm
10917  * Description: Function inits imsm migration record
10918  * Parameters:
10919  *      super   : imsm internal array info
10920  *      dev     : device under migration
10921  *      info    : general array info to find the smallest device
10922  * Returns:
10923  *      none
10924  ******************************************************************************/
10925 void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
10926                            struct mdinfo *info)
10927 {
10928         struct intel_super *super = st->sb;
10929         struct migr_record *migr_rec = super->migr_rec;
10930         int new_data_disks;
10931         unsigned long long dsize, dev_sectors;
10932         long long unsigned min_dev_sectors = -1LLU;
10933         struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
10934         struct imsm_map *map_src = get_imsm_map(dev, MAP_1);
10935         unsigned long long num_migr_units;
10936         unsigned long long array_blocks;
10937         struct dl *dl_disk = NULL;
10938
10939         memset(migr_rec, 0, sizeof(struct migr_record));
10940         migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
10941
10942         /* only ascending reshape supported now */
10943         migr_rec->ascending_migr = __cpu_to_le32(1);
10944
10945         migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
10946                 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
10947         migr_rec->dest_depth_per_unit *=
10948                 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
10949         new_data_disks = imsm_num_data_members(map_dest);
10950         migr_rec->blocks_per_unit =
10951                 __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
10952         migr_rec->dest_depth_per_unit =
10953                 __cpu_to_le32(migr_rec->dest_depth_per_unit);
10954         array_blocks = info->component_size * new_data_disks;
10955         num_migr_units =
10956                 array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
10957
10958         if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
10959                 num_migr_units++;
10960         set_num_migr_units(migr_rec, num_migr_units);
10961
10962         migr_rec->post_migr_vol_cap =  dev->size_low;
10963         migr_rec->post_migr_vol_cap_hi = dev->size_high;
10964
10965         /* Find the smallest dev */
10966         for (dl_disk =  super->disks; dl_disk ; dl_disk = dl_disk->next) {
10967                 /* ignore spares in container */
10968                 if (dl_disk->index < 0)
10969                         continue;
10970                 get_dev_size(dl_disk->fd, NULL, &dsize);
10971                 dev_sectors = dsize / 512;
10972                 if (dev_sectors < min_dev_sectors)
10973                         min_dev_sectors = dev_sectors;
10974         }
10975         set_migr_chkp_area_pba(migr_rec, min_dev_sectors -
10976                                         RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
10977
10978         write_imsm_migr_rec(st);
10979
10980         return;
10981 }
10982
10983 /*******************************************************************************
10984  * Function:    save_backup_imsm
10985  * Description: Function saves critical data stripes to Migration Copy Area
10986  *              and updates the current migration unit status.
10987  *              Use restore_stripes() to form a destination stripe,
10988  *              and to write it to the Copy Area.
10989  * Parameters:
10990  *      st              : supertype information
10991  *      dev             : imsm device that backup is saved for
10992  *      info            : general array info
10993  *      buf             : input buffer
10994  *      length          : length of data to backup (blocks_per_unit)
10995  * Returns:
10996  *       0 : success
10997  *,     -1 : fail
10998  ******************************************************************************/
10999 int save_backup_imsm(struct supertype *st,
11000                      struct imsm_dev *dev,
11001                      struct mdinfo *info,
11002                      void *buf,
11003                      int length)
11004 {
11005         int rv = -1;
11006         struct intel_super *super = st->sb;
11007         int i;
11008         struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
11009         int new_disks = map_dest->num_members;
11010         int dest_layout = 0;
11011         int dest_chunk, targets[new_disks];
11012         unsigned long long start, target_offsets[new_disks];
11013         int data_disks = imsm_num_data_members(map_dest);
11014
11015         for (i = 0; i < new_disks; i++) {
11016                 struct dl *dl_disk = get_imsm_dl_disk(super, i);
11017                 if (dl_disk && is_fd_valid(dl_disk->fd))
11018                         targets[i] = dl_disk->fd;
11019                 else
11020                         goto abort;
11021         }
11022
11023         start = info->reshape_progress * 512;
11024         for (i = 0; i < new_disks; i++) {
11025                 target_offsets[i] = migr_chkp_area_pba(super->migr_rec) * 512;
11026                 /* move back copy area adderss, it will be moved forward
11027                  * in restore_stripes() using start input variable
11028                  */
11029                 target_offsets[i] -= start/data_disks;
11030         }
11031
11032         dest_layout = imsm_level_to_layout(map_dest->raid_level);
11033         dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
11034
11035         if (restore_stripes(targets, /* list of dest devices */
11036                             target_offsets, /* migration record offsets */
11037                             new_disks,
11038                             dest_chunk,
11039                             map_dest->raid_level,
11040                             dest_layout,
11041                             -1,    /* source backup file descriptor */
11042                             0,     /* input buf offset
11043                                     * always 0 buf is already offseted */
11044                             start,
11045                             length,
11046                             buf) != 0) {
11047                 pr_err("Error restoring stripes\n");
11048                 goto abort;
11049         }
11050
11051         rv = 0;
11052
11053 abort:
11054         return rv;
11055 }
11056
11057 /*******************************************************************************
11058  * Function:    save_checkpoint_imsm
11059  * Description: Function called for current unit status update
11060  *              in the migration record. It writes it to disk.
11061  * Parameters:
11062  *      super   : imsm internal array info
11063  *      info    : general array info
11064  * Returns:
11065  *      0: success
11066  *      1: failure
11067  *      2: failure, means no valid migration record
11068  *                 / no general migration in progress /
11069  ******************************************************************************/
11070 int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
11071 {
11072         struct intel_super *super = st->sb;
11073         unsigned long long blocks_per_unit;
11074         unsigned long long curr_migr_unit;
11075
11076         if (load_imsm_migr_rec(super) != 0) {
11077                 dprintf("imsm: ERROR: Cannot read migration record for checkpoint save.\n");
11078                 return 1;
11079         }
11080
11081         blocks_per_unit = __le32_to_cpu(super->migr_rec->blocks_per_unit);
11082         if (blocks_per_unit == 0) {
11083                 dprintf("imsm: no migration in progress.\n");
11084                 return 2;
11085         }
11086         curr_migr_unit = info->reshape_progress / blocks_per_unit;
11087         /* check if array is alligned to copy area
11088          * if it is not alligned, add one to current migration unit value
11089          * this can happend on array reshape finish only
11090          */
11091         if (info->reshape_progress % blocks_per_unit)
11092                 curr_migr_unit++;
11093
11094         set_current_migr_unit(super->migr_rec, curr_migr_unit);
11095         super->migr_rec->rec_status = __cpu_to_le32(state);
11096         set_migr_dest_1st_member_lba(super->migr_rec,
11097                         super->migr_rec->dest_depth_per_unit * curr_migr_unit);
11098
11099         if (write_imsm_migr_rec(st) < 0) {
11100                 dprintf("imsm: Cannot write migration record outside backup area\n");
11101                 return 1;
11102         }
11103
11104         return 0;
11105 }
11106
11107 /*******************************************************************************
11108  * Function:    recover_backup_imsm
11109  * Description: Function recovers critical data from the Migration Copy Area
11110  *              while assembling an array.
11111  * Parameters:
11112  *      super   : imsm internal array info
11113  *      info    : general array info
11114  * Returns:
11115  *      0 : success (or there is no data to recover)
11116  *      1 : fail
11117  ******************************************************************************/
11118 int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
11119 {
11120         struct intel_super *super = st->sb;
11121         struct migr_record *migr_rec = super->migr_rec;
11122         struct imsm_map *map_dest;
11123         struct intel_dev *id = NULL;
11124         unsigned long long read_offset;
11125         unsigned long long write_offset;
11126         unsigned unit_len;
11127         int new_disks, err;
11128         char *buf = NULL;
11129         int retval = 1;
11130         unsigned int sector_size = super->sector_size;
11131         unsigned long long curr_migr_unit = current_migr_unit(migr_rec);
11132         unsigned long long num_migr_units = get_num_migr_units(migr_rec);
11133         char buffer[SYSFS_MAX_BUF_SIZE];
11134         int skipped_disks = 0;
11135         struct dl *dl_disk;
11136
11137         err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, sizeof(buffer));
11138         if (err < 1)
11139                 return 1;
11140
11141         /* recover data only during assemblation */
11142         if (strncmp(buffer, "inactive", 8) != 0)
11143                 return 0;
11144         /* no data to recover */
11145         if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
11146                 return 0;
11147         if (curr_migr_unit >= num_migr_units)
11148                 return 1;
11149
11150         /* find device during reshape */
11151         for (id = super->devlist; id; id = id->next)
11152                 if (is_gen_migration(id->dev))
11153                         break;
11154         if (id == NULL)
11155                 return 1;
11156
11157         map_dest = get_imsm_map(id->dev, MAP_0);
11158         new_disks = map_dest->num_members;
11159
11160         read_offset = migr_chkp_area_pba(migr_rec) * 512;
11161
11162         write_offset = (migr_dest_1st_member_lba(migr_rec) +
11163                         pba_of_lba0(map_dest)) * 512;
11164
11165         unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
11166         if (posix_memalign((void **)&buf, sector_size, unit_len) != 0)
11167                 goto abort;
11168
11169         for (dl_disk = super->disks; dl_disk; dl_disk = dl_disk->next) {
11170                 if (dl_disk->index < 0)
11171                         continue;
11172
11173                 if (!is_fd_valid(dl_disk->fd)) {
11174                         skipped_disks++;
11175                         continue;
11176                 }
11177                 if (lseek64(dl_disk->fd, read_offset, SEEK_SET) < 0) {
11178                         pr_err("Cannot seek to block: %s\n",
11179                                strerror(errno));
11180                         skipped_disks++;
11181                         continue;
11182                 }
11183                 if (read(dl_disk->fd, buf, unit_len) != (ssize_t)unit_len) {
11184                         pr_err("Cannot read copy area block: %s\n",
11185                                strerror(errno));
11186                         skipped_disks++;
11187                         continue;
11188                 }
11189                 if (lseek64(dl_disk->fd, write_offset, SEEK_SET) < 0) {
11190                         pr_err("Cannot seek to block: %s\n",
11191                                strerror(errno));
11192                         skipped_disks++;
11193                         continue;
11194                 }
11195                 if (write(dl_disk->fd, buf, unit_len) != (ssize_t)unit_len) {
11196                         pr_err("Cannot restore block: %s\n",
11197                                strerror(errno));
11198                         skipped_disks++;
11199                         continue;
11200                 }
11201         }
11202
11203         if (skipped_disks > imsm_get_allowed_degradation(info->new_level,
11204                                                          new_disks,
11205                                                          super,
11206                                                          id->dev)) {
11207                 pr_err("Cannot restore data from backup. Too many failed disks\n");
11208                 goto abort;
11209         }
11210
11211         if (save_checkpoint_imsm(st, info, UNIT_SRC_NORMAL)) {
11212                 /* ignore error == 2, this can mean end of reshape here
11213                  */
11214                 dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_NORMAL) during restart\n");
11215         } else
11216                 retval = 0;
11217
11218 abort:
11219         free(buf);
11220         return retval;
11221 }
11222
11223 /**
11224  * test_and_add_drive_controller_policy_imsm() - add disk controller to policies list.
11225  * @type: Policy type to search on list.
11226  * @pols: List of currently recorded policies.
11227  * @disk_fd: File descriptor of the device to check.
11228  * @hba: The hba disk is attached, could be NULL if verification is disabled.
11229  * @verbose: verbose flag.
11230  *
11231  * IMSM cares about drive physical placement. If @hba is not set, it adds unknown policy.
11232  * If there is no controller policy on pols we are free to add first one. If there is a policy then,
11233  * new must be the same - no controller mixing allowed.
11234  */
11235 static mdadm_status_t
11236 test_and_add_drive_controller_policy_imsm(const char * const type, dev_policy_t **pols, int disk_fd,
11237                                           struct sys_dev *hba, const int verbose)
11238 {
11239         const char *controller_policy = get_sys_dev_type(SYS_DEV_UNKNOWN);
11240         struct dev_policy *pol = pol_find(*pols, (char *)type);
11241         char devname[MAX_RAID_SERIAL_LEN];
11242
11243         if (hba)
11244                 controller_policy = get_sys_dev_type(hba->type);
11245
11246         if (!pol) {
11247                 pol_add(pols, (char *)type, (char *)controller_policy, "imsm");
11248                 return MDADM_STATUS_SUCCESS;
11249         }
11250
11251         if (strcmp(pol->value, controller_policy) == 0)
11252                 return MDADM_STATUS_SUCCESS;
11253
11254         fd2devname(disk_fd, devname);
11255         pr_vrb("Intel(R) raid controller \"%s\" found for %s, but \"%s\" was detected earlier\n",
11256                controller_policy, devname, pol->value);
11257         pr_vrb("Disks under different controllers cannot be used, aborting\n");
11258
11259         return MDADM_STATUS_ERROR;
11260 }
11261
11262 struct imsm_drive_policy {
11263         char *type;
11264         mdadm_status_t (*test_and_add_drive_policy)(const char * const type,
11265                                                     struct dev_policy **pols, int disk_fd,
11266                                                     struct sys_dev *hba, const int verbose);
11267 };
11268
11269 struct imsm_drive_policy imsm_policies[] = {
11270         {"controller", test_and_add_drive_controller_policy_imsm},
11271 };
11272
11273 mdadm_status_t test_and_add_drive_policies_imsm(struct dev_policy **pols, int disk_fd,
11274                                                 const int verbose)
11275 {
11276         struct imsm_drive_policy *imsm_pol;
11277         struct sys_dev *hba = NULL;
11278         char path[PATH_MAX];
11279         mdadm_status_t ret;
11280         unsigned int i;
11281
11282         /* If imsm platform verification is disabled, do not search for hba. */
11283         if (check_no_platform() != 1) {
11284                 if (!diskfd_to_devpath(disk_fd, 1, path)) {
11285                         pr_vrb("IMSM: Failed to retrieve device path by file descriptor.\n");
11286                         return MDADM_STATUS_ERROR;
11287                 }
11288
11289                 hba = find_disk_attached_hba(disk_fd, path);
11290                 if (!hba) {
11291                         pr_vrb("IMSM: Failed to find hba for %s\n", path);
11292                         return MDADM_STATUS_ERROR;
11293                 }
11294         }
11295
11296         for (i = 0; i < ARRAY_SIZE(imsm_policies); i++) {
11297                 imsm_pol = &imsm_policies[i];
11298
11299                 ret = imsm_pol->test_and_add_drive_policy(imsm_pol->type, pols, disk_fd, hba,
11300                                                           verbose);
11301                 if (ret != MDADM_STATUS_SUCCESS)
11302                         /* Inherit error code */
11303                         return ret;
11304         }
11305
11306         return MDADM_STATUS_SUCCESS;
11307 }
11308
11309 /**
11310  * get_spare_criteria_imsm() - set spare criteria.
11311  * @st: supertype.
11312  * @mddev_path: path to md device devnode, it must be container.
11313  * @c: spare_criteria struct to fill, not NULL.
11314  *
11315  * If superblock is not loaded, use mddev_path to load_container. It must be given in this case.
11316  * Filles size and sector size accordingly to superblock.
11317  */
11318 mdadm_status_t get_spare_criteria_imsm(struct supertype *st, char *mddev_path,
11319                                        struct spare_criteria *c)
11320 {
11321         mdadm_status_t ret = MDADM_STATUS_ERROR;
11322         bool free_superblock = false;
11323         unsigned long long size = 0;
11324         struct intel_super *super;
11325         struct extent *e;
11326         struct dl *dl;
11327         int i;
11328
11329         /* If no superblock and no mddev_path, we cannot load superblock. */
11330         assert(st->sb || mddev_path);
11331
11332         if (mddev_path) {
11333                 int fd = open(mddev_path, O_RDONLY);
11334                 mdadm_status_t rv;
11335
11336                 if (!is_fd_valid(fd))
11337                         return MDADM_STATUS_ERROR;
11338
11339                 if (!st->sb) {
11340                         if (load_container_imsm(st, fd, st->devnm)) {
11341                                 close(fd);
11342                                 return MDADM_STATUS_ERROR;
11343                         }
11344                         free_superblock = true;
11345                 }
11346
11347                 rv = mddev_test_and_add_drive_policies(st, &c->pols, fd, 0);
11348                 close(fd);
11349
11350                 if (rv != MDADM_STATUS_SUCCESS)
11351                         goto out;
11352         }
11353
11354         super = st->sb;
11355
11356         /* find first active disk in array */
11357         dl = super->disks;
11358         while (dl && (is_failed(&dl->disk) || dl->index == -1))
11359                 dl = dl->next;
11360
11361         if (!dl)
11362                 goto out;
11363
11364         /* find last lba used by subarrays */
11365         e = get_extents(super, dl, 0);
11366         if (!e)
11367                 goto out;
11368
11369         for (i = 0; e[i].size; i++)
11370                 continue;
11371         if (i > 0)
11372                 size = e[i - 1].start + e[i - 1].size;
11373         free(e);
11374
11375         /* add the amount of space needed for metadata */
11376         size += imsm_min_reserved_sectors(super);
11377
11378         c->min_size = size * 512;
11379         c->sector_size = super->sector_size;
11380         c->criteria_set = true;
11381         ret = MDADM_STATUS_SUCCESS;
11382
11383 out:
11384         if (free_superblock)
11385                 free_super_imsm(st);
11386
11387         if (ret != MDADM_STATUS_SUCCESS)
11388                 c->criteria_set = false;
11389
11390         return ret;
11391 }
11392
11393 static char *imsm_find_array_devnm_by_subdev(int subdev, char *container)
11394 {
11395         static char devnm[32];
11396         char subdev_name[20];
11397         struct mdstat_ent *mdstat;
11398
11399         sprintf(subdev_name, "%d", subdev);
11400         mdstat = mdstat_by_subdev(subdev_name, container);
11401         if (!mdstat)
11402                 return NULL;
11403
11404         strcpy(devnm, mdstat->devnm);
11405         free_mdstat(mdstat);
11406         return devnm;
11407 }
11408
11409 static int imsm_reshape_is_allowed_on_container(struct supertype *st,
11410                                                 struct geo_params *geo,
11411                                                 int *old_raid_disks,
11412                                                 int direction)
11413 {
11414         /* currently we only support increasing the number of devices
11415          * for a container.  This increases the number of device for each
11416          * member array.  They must all be RAID0 or RAID5.
11417          */
11418         int ret_val = 0;
11419         struct mdinfo *info, *member;
11420         int devices_that_can_grow = 0;
11421
11422         dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): st->devnm = (%s)\n", st->devnm);
11423
11424         if (geo->size > 0 ||
11425             geo->level != UnSet ||
11426             geo->layout != UnSet ||
11427             geo->chunksize != 0 ||
11428             geo->raid_disks == UnSet) {
11429                 dprintf("imsm: Container operation is allowed for raid disks number change only.\n");
11430                 return ret_val;
11431         }
11432
11433         if (direction == ROLLBACK_METADATA_CHANGES) {
11434                 dprintf("imsm: Metadata changes rollback is not supported for container operation.\n");
11435                 return ret_val;
11436         }
11437
11438         info = container_content_imsm(st, NULL);
11439         for (member = info; member; member = member->next) {
11440                 char *result;
11441
11442                 dprintf("imsm: checking device_num: %i\n",
11443                         member->container_member);
11444
11445                 if (geo->raid_disks <= member->array.raid_disks) {
11446                         /* we work on container for Online Capacity Expansion
11447                          * only so raid_disks has to grow
11448                          */
11449                         dprintf("imsm: for container operation raid disks increase is required\n");
11450                         break;
11451                 }
11452
11453                 if (info->array.level != 0 && info->array.level != 5) {
11454                         /* we cannot use this container with other raid level
11455                          */
11456                         dprintf("imsm: for container operation wrong raid level (%i) detected\n",
11457                                 info->array.level);
11458                         break;
11459                 } else {
11460                         /* check for platform support
11461                          * for this raid level configuration
11462                          */
11463                         struct intel_super *super = st->sb;
11464                         if (!is_raid_level_supported(super->orom,
11465                                                      member->array.level,
11466                                                      geo->raid_disks)) {
11467                                 dprintf("platform does not support raid%d with %d disk%s\n",
11468                                          info->array.level,
11469                                          geo->raid_disks,
11470                                          geo->raid_disks > 1 ? "s" : "");
11471                                 break;
11472                         }
11473                         /* check if component size is aligned to chunk size
11474                          */
11475                         if (info->component_size %
11476                             (info->array.chunk_size/512)) {
11477                                 dprintf("Component size is not aligned to chunk size\n");
11478                                 break;
11479                         }
11480                 }
11481
11482                 if (*old_raid_disks &&
11483                     info->array.raid_disks != *old_raid_disks)
11484                         break;
11485                 *old_raid_disks = info->array.raid_disks;
11486
11487                 /* All raid5 and raid0 volumes in container
11488                  * have to be ready for Online Capacity Expansion
11489                  * so they need to be assembled.  We have already
11490                  * checked that no recovery etc is happening.
11491                  */
11492                 result = imsm_find_array_devnm_by_subdev(member->container_member,
11493                                                          st->container_devnm);
11494                 if (result == NULL) {
11495                         dprintf("imsm: cannot find array\n");
11496                         break;
11497                 }
11498                 devices_that_can_grow++;
11499         }
11500         sysfs_free(info);
11501         if (!member && devices_that_can_grow)
11502                 ret_val = 1;
11503
11504         if (ret_val)
11505                 dprintf("Container operation allowed\n");
11506         else
11507                 dprintf("Error: %i\n", ret_val);
11508
11509         return ret_val;
11510 }
11511
11512 /* Function: get_spares_for_grow
11513  * Description: Allocates memory and creates list of spare devices
11514  *              avaliable in container. Checks if spare drive size is acceptable.
11515  * Parameters: Pointer to the supertype structure
11516  * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
11517  *              NULL if fail
11518  */
11519 static struct mdinfo *get_spares_for_grow(struct supertype *st)
11520 {
11521         struct spare_criteria sc = {0};
11522         struct mdinfo *spares;
11523
11524         get_spare_criteria_imsm(st, NULL, &sc);
11525         spares = container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
11526
11527         dev_policy_free(sc.pols);
11528
11529         return spares;
11530 }
11531
11532 /******************************************************************************
11533  * function: imsm_create_metadata_update_for_reshape
11534  * Function creates update for whole IMSM container.
11535  *
11536  ******************************************************************************/
11537 static int imsm_create_metadata_update_for_reshape(
11538         struct supertype *st,
11539         struct geo_params *geo,
11540         int old_raid_disks,
11541         struct imsm_update_reshape **updatep)
11542 {
11543         struct intel_super *super = st->sb;
11544         struct imsm_super *mpb = super->anchor;
11545         int update_memory_size;
11546         struct imsm_update_reshape *u;
11547         struct mdinfo *spares;
11548         int i;
11549         int delta_disks;
11550         struct mdinfo *dev;
11551
11552         dprintf("(enter) raid_disks = %i\n", geo->raid_disks);
11553
11554         delta_disks = geo->raid_disks - old_raid_disks;
11555
11556         /* size of all update data without anchor */
11557         update_memory_size = sizeof(struct imsm_update_reshape);
11558
11559         /* now add space for spare disks that we need to add. */
11560         update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
11561
11562         u = xcalloc(1, update_memory_size);
11563         u->type = update_reshape_container_disks;
11564         u->old_raid_disks = old_raid_disks;
11565         u->new_raid_disks = geo->raid_disks;
11566
11567         /* now get spare disks list
11568          */
11569         spares = get_spares_for_grow(st);
11570
11571         if (spares == NULL || delta_disks > spares->array.spare_disks) {
11572                 pr_err("imsm: ERROR: Cannot get spare devices for %s.\n", geo->dev_name);
11573                 i = -1;
11574                 goto abort;
11575         }
11576
11577         /* we have got spares
11578          * update disk list in imsm_disk list table in anchor
11579          */
11580         dprintf("imsm: %i spares are available.\n\n",
11581                 spares->array.spare_disks);
11582
11583         dev = spares->devs;
11584         for (i = 0; i < delta_disks; i++) {
11585                 struct dl *dl;
11586
11587                 if (dev == NULL)
11588                         break;
11589                 u->new_disks[i] = makedev(dev->disk.major,
11590                                           dev->disk.minor);
11591                 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
11592                 dl->index = mpb->num_disks;
11593                 mpb->num_disks++;
11594                 dev = dev->next;
11595         }
11596
11597 abort:
11598         /* free spares
11599          */
11600         sysfs_free(spares);
11601
11602         dprintf("imsm: reshape update preparation :");
11603         if (i == delta_disks) {
11604                 dprintf_cont(" OK\n");
11605                 *updatep = u;
11606                 return update_memory_size;
11607         }
11608         free(u);
11609         dprintf_cont(" Error\n");
11610
11611         return 0;
11612 }
11613
11614 /******************************************************************************
11615  * function: imsm_create_metadata_update_for_size_change()
11616  *           Creates update for IMSM array for array size change.
11617  *
11618  ******************************************************************************/
11619 static int imsm_create_metadata_update_for_size_change(
11620                                 struct supertype *st,
11621                                 struct geo_params *geo,
11622                                 struct imsm_update_size_change **updatep)
11623 {
11624         struct intel_super *super = st->sb;
11625         int update_memory_size;
11626         struct imsm_update_size_change *u;
11627
11628         dprintf("(enter) New size = %llu\n", geo->size);
11629
11630         /* size of all update data without anchor */
11631         update_memory_size = sizeof(struct imsm_update_size_change);
11632
11633         u = xcalloc(1, update_memory_size);
11634         u->type = update_size_change;
11635         u->subdev = super->current_vol;
11636         u->new_size = geo->size;
11637
11638         dprintf("imsm: reshape update preparation : OK\n");
11639         *updatep = u;
11640
11641         return update_memory_size;
11642 }
11643
11644 /******************************************************************************
11645  * function: imsm_create_metadata_update_for_migration()
11646  *           Creates update for IMSM array.
11647  *
11648  ******************************************************************************/
11649 static int imsm_create_metadata_update_for_migration(
11650                                         struct supertype *st,
11651                                         struct geo_params *geo,
11652                                         struct imsm_update_reshape_migration **updatep)
11653 {
11654         struct intel_super *super = st->sb;
11655         int update_memory_size;
11656         int current_chunk_size;
11657         struct imsm_update_reshape_migration *u;
11658         struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
11659         struct imsm_map *map = get_imsm_map(dev, MAP_0);
11660         int previous_level = -1;
11661
11662         dprintf("(enter) New Level = %i\n", geo->level);
11663
11664         /* size of all update data without anchor */
11665         update_memory_size = sizeof(struct imsm_update_reshape_migration);
11666
11667         u = xcalloc(1, update_memory_size);
11668         u->type = update_reshape_migration;
11669         u->subdev = super->current_vol;
11670         u->new_level = geo->level;
11671         u->new_layout = geo->layout;
11672         u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
11673         u->new_disks[0] = -1;
11674         u->new_chunksize = -1;
11675
11676         current_chunk_size = __le16_to_cpu(map->blocks_per_strip) / 2;
11677
11678         if (geo->chunksize != current_chunk_size) {
11679                 u->new_chunksize = geo->chunksize / 1024;
11680                 dprintf("imsm: chunk size change from %i to %i\n",
11681                         current_chunk_size, u->new_chunksize);
11682         }
11683         previous_level = map->raid_level;
11684
11685         if (geo->level == 5 && previous_level == 0) {
11686                 struct mdinfo *spares = NULL;
11687
11688                 u->new_raid_disks++;
11689                 spares = get_spares_for_grow(st);
11690                 if (spares == NULL || spares->array.spare_disks < 1) {
11691                         free(u);
11692                         sysfs_free(spares);
11693                         update_memory_size = 0;
11694                         pr_err("cannot get spare device for requested migration\n");
11695                         return 0;
11696                 }
11697                 sysfs_free(spares);
11698         }
11699         dprintf("imsm: reshape update preparation : OK\n");
11700         *updatep = u;
11701
11702         return update_memory_size;
11703 }
11704
11705 static void imsm_update_metadata_locally(struct supertype *st,
11706                                          void *buf, int len)
11707 {
11708         struct metadata_update mu;
11709
11710         mu.buf = buf;
11711         mu.len = len;
11712         mu.space = NULL;
11713         mu.space_list = NULL;
11714         mu.next = NULL;
11715         if (imsm_prepare_update(st, &mu))
11716                 imsm_process_update(st, &mu);
11717
11718         while (mu.space_list) {
11719                 void **space = mu.space_list;
11720                 mu.space_list = *space;
11721                 free(space);
11722         }
11723 }
11724
11725 /**
11726  * imsm_analyze_expand() - check expand properties and calculate new size.
11727  * @st: imsm supertype.
11728  * @geo: new geometry params.
11729  * @array: array info.
11730  * @direction: reshape direction.
11731  *
11732  * Obtain free space after the &array and verify if expand to requested size is
11733  * possible. If geo->size is set to %MAX_SIZE, assume that max free size is
11734  * requested.
11735  *
11736  * Return:
11737  * On success %IMSM_STATUS_OK is returned, geo->size and geo->raid_disks are
11738  * updated.
11739  * On error, %IMSM_STATUS_ERROR is returned.
11740  */
11741 static imsm_status_t imsm_analyze_expand(struct supertype *st,
11742                                          struct geo_params *geo,
11743                                          struct mdinfo *array,
11744                                          int direction)
11745 {
11746         struct intel_super *super = st->sb;
11747         struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
11748         struct imsm_map *map = get_imsm_map(dev, MAP_0);
11749         int data_disks = imsm_num_data_members(map);
11750
11751         unsigned long long current_size;
11752         unsigned long long free_size;
11753         unsigned long long new_size;
11754         unsigned long long max_size;
11755
11756         const int chunk_kib = geo->chunksize / 1024;
11757         imsm_status_t rv;
11758
11759         if (direction == ROLLBACK_METADATA_CHANGES) {
11760                 /**
11761                  * Accept size for rollback only.
11762                  */
11763                 new_size = geo->size * 2;
11764                 goto success;
11765         }
11766
11767         if (data_disks == 0) {
11768                 pr_err("imsm: Cannot retrieve data disks.\n");
11769                 return IMSM_STATUS_ERROR;
11770         }
11771         current_size = array->custom_array_size / data_disks;
11772
11773         rv = imsm_get_free_size(super, dev->vol.map->num_members, 0, chunk_kib, &free_size, true);
11774         if (rv != IMSM_STATUS_OK) {
11775                 pr_err("imsm: Cannot find free space for expand.\n");
11776                 return IMSM_STATUS_ERROR;
11777         }
11778         max_size = round_member_size_to_mb(free_size + current_size);
11779
11780         if (geo->size == MAX_SIZE)
11781                 new_size = max_size;
11782         else
11783                 new_size = round_member_size_to_mb(geo->size * 2);
11784
11785         if (new_size == 0) {
11786                 pr_err("imsm: Rounded requested size is 0.\n");
11787                 return IMSM_STATUS_ERROR;
11788         }
11789
11790         if (new_size > max_size) {
11791                 pr_err("imsm: Rounded requested size (%llu) is larger than free space available (%llu).\n",
11792                        new_size, max_size);
11793                 return IMSM_STATUS_ERROR;
11794         }
11795
11796         if (new_size == current_size) {
11797                 pr_err("imsm: Rounded requested size (%llu) is same as current size (%llu).\n",
11798                        new_size, current_size);
11799                 return IMSM_STATUS_ERROR;
11800         }
11801
11802         if (new_size < current_size) {
11803                 pr_err("imsm: Size reduction is not supported, rounded requested size (%llu) is smaller than current (%llu).\n",
11804                        new_size, current_size);
11805                 return IMSM_STATUS_ERROR;
11806         }
11807
11808 success:
11809         dprintf("imsm: New size per member is %llu.\n", new_size);
11810         geo->size = data_disks * new_size;
11811         geo->raid_disks = dev->vol.map->num_members;
11812         return IMSM_STATUS_OK;
11813 }
11814
11815 /***************************************************************************
11816 * Function:     imsm_analyze_change
11817 * Description:  Function analyze change for single volume
11818 *               and validate if transition is supported
11819 * Parameters:   Geometry parameters, supertype structure,
11820 *               metadata change direction (apply/rollback)
11821 * Returns:      Operation type code on success, -1 if fail
11822 ****************************************************************************/
11823 enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
11824                                            struct geo_params *geo,
11825                                            int direction)
11826 {
11827         struct mdinfo info;
11828         int change = -1;
11829         int check_devs = 0;
11830         int chunk;
11831         /* number of added/removed disks in operation result */
11832         int devNumChange = 0;
11833         /* imsm compatible layout value for array geometry verification */
11834         int imsm_layout = -1;
11835         imsm_status_t rv;
11836
11837         getinfo_super_imsm_volume(st, &info, NULL);
11838         if (geo->level != info.array.level && geo->level >= 0 &&
11839             geo->level != UnSet) {
11840                 switch (info.array.level) {
11841                 case 0:
11842                         if (geo->level == 5) {
11843                                 change = CH_MIGRATION;
11844                                 if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) {
11845                                         pr_err("Error. Requested Layout not supported (left-asymmetric layout is supported only)!\n");
11846                                         change = -1;
11847                                         goto analyse_change_exit;
11848                                 }
11849                                 imsm_layout =  geo->layout;
11850                                 check_devs = 1;
11851                                 devNumChange = 1; /* parity disk added */
11852                         } else if (geo->level == 10) {
11853                                 change = CH_TAKEOVER;
11854                                 check_devs = 1;
11855                                 devNumChange = 2; /* two mirrors added */
11856                                 imsm_layout = 0x102; /* imsm supported layout */
11857                         }
11858                         break;
11859                 case 1:
11860                 case 10:
11861                         if (geo->level == 0) {
11862                                 change = CH_TAKEOVER;
11863                                 check_devs = 1;
11864                                 devNumChange = -(geo->raid_disks/2);
11865                                 imsm_layout = 0; /* imsm raid0 layout */
11866                         }
11867                         break;
11868                 }
11869                 if (change == -1) {
11870                         pr_err("Error. Level Migration from %d to %d not supported!\n",
11871                                info.array.level, geo->level);
11872                         goto analyse_change_exit;
11873                 }
11874         } else
11875                 geo->level = info.array.level;
11876
11877         if (geo->layout != info.array.layout &&
11878             (geo->layout != UnSet && geo->layout != -1)) {
11879                 change = CH_MIGRATION;
11880                 if (info.array.layout == 0 && info.array.level == 5 &&
11881                     geo->layout == 5) {
11882                         /* reshape 5 -> 4 */
11883                 } else if (info.array.layout == 5 && info.array.level == 5 &&
11884                            geo->layout == 0) {
11885                         /* reshape 4 -> 5 */
11886                         geo->layout = 0;
11887                         geo->level = 5;
11888                 } else {
11889                         pr_err("Error. Layout Migration from %d to %d not supported!\n",
11890                                info.array.layout, geo->layout);
11891                         change = -1;
11892                         goto analyse_change_exit;
11893                 }
11894         } else {
11895                 geo->layout = info.array.layout;
11896                 if (imsm_layout == -1)
11897                         imsm_layout = info.array.layout;
11898         }
11899
11900         if (geo->chunksize > 0 && geo->chunksize != UnSet &&
11901             geo->chunksize != info.array.chunk_size) {
11902                 if (info.array.level == 10) {
11903                         pr_err("Error. Chunk size change for RAID 10 is not supported.\n");
11904                         change = -1;
11905                         goto analyse_change_exit;
11906                 } else if (info.component_size % (geo->chunksize/512)) {
11907                         pr_err("New chunk size (%dK) does not evenly divide device size (%lluk). Aborting...\n",
11908                                geo->chunksize/1024, info.component_size/2);
11909                         change = -1;
11910                         goto analyse_change_exit;
11911                 }
11912                 change = CH_MIGRATION;
11913         } else {
11914                 geo->chunksize = info.array.chunk_size;
11915         }
11916
11917         if (geo->size > 0) {
11918                 if (change != -1) {
11919                         pr_err("Error. Size change should be the only one at a time.\n");
11920                         change = -1;
11921                         goto analyse_change_exit;
11922                 }
11923
11924                 rv = imsm_analyze_expand(st, geo, &info, direction);
11925                 if (rv != IMSM_STATUS_OK)
11926                         goto analyse_change_exit;
11927                 change = CH_ARRAY_SIZE;
11928         }
11929
11930         chunk = geo->chunksize / 1024;
11931         if (!validate_geometry_imsm(st,
11932                                     geo->level,
11933                                     imsm_layout,
11934                                     geo->raid_disks + devNumChange,
11935                                     &chunk,
11936                                     geo->size, INVALID_SECTORS,
11937                                     0, 0, info.consistency_policy, 1))
11938                 change = -1;
11939
11940         if (check_devs) {
11941                 struct intel_super *super = st->sb;
11942                 struct imsm_super *mpb = super->anchor;
11943
11944                 if (mpb->num_raid_devs > 1) {
11945                         pr_err("Error. Cannot perform operation on %s- for this operation "
11946                                "it MUST be single array in container\n", geo->dev_name);
11947                         change = -1;
11948                 }
11949         }
11950
11951 analyse_change_exit:
11952         if (direction == ROLLBACK_METADATA_CHANGES &&
11953             (change == CH_MIGRATION || change == CH_TAKEOVER)) {
11954                 dprintf("imsm: Metadata changes rollback is not supported for migration and takeover operations.\n");
11955                 change = -1;
11956         }
11957         return change;
11958 }
11959
11960 int imsm_takeover(struct supertype *st, struct geo_params *geo)
11961 {
11962         struct intel_super *super = st->sb;
11963         struct imsm_update_takeover *u;
11964
11965         u = xmalloc(sizeof(struct imsm_update_takeover));
11966
11967         u->type = update_takeover;
11968         u->subarray = super->current_vol;
11969
11970         /* 10->0 transition */
11971         if (geo->level == 0)
11972                 u->direction = R10_TO_R0;
11973
11974         /* 0->10 transition */
11975         if (geo->level == 10)
11976                 u->direction = R0_TO_R10;
11977
11978         /* update metadata locally */
11979         imsm_update_metadata_locally(st, u,
11980                                         sizeof(struct imsm_update_takeover));
11981         /* and possibly remotely */
11982         if (st->update_tail)
11983                 append_metadata_update(st, u,
11984                                         sizeof(struct imsm_update_takeover));
11985         else
11986                 free(u);
11987
11988         return 0;
11989 }
11990
11991 /* Flush size update if size calculated by num_data_stripes is higher than
11992  * imsm_dev_size to eliminate differences during reshape.
11993  * Mdmon will recalculate them correctly.
11994  * If subarray index is not set then check whole container.
11995  * Returns:
11996  *      0 - no error occurred
11997  *      1 - error detected
11998  */
11999 static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index)
12000 {
12001         struct intel_super *super = st->sb;
12002         int tmp = super->current_vol;
12003         int ret_val = 1;
12004         int i;
12005
12006         for (i = 0; i < super->anchor->num_raid_devs; i++) {
12007                 if (subarray_index >= 0 && i != subarray_index)
12008                         continue;
12009                 super->current_vol = i;
12010                 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
12011                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
12012                 unsigned int disc_count = imsm_num_data_members(map);
12013                 struct geo_params geo;
12014                 struct imsm_update_size_change *update;
12015                 unsigned long long calc_size = per_dev_array_size(map) * disc_count;
12016                 unsigned long long d_size = imsm_dev_size(dev);
12017                 int u_size;
12018
12019                 if (calc_size == d_size)
12020                         continue;
12021
12022                 /* There is a difference, confirm that imsm_dev_size is
12023                  * smaller and push update.
12024                  */
12025                 if (d_size > calc_size) {
12026                         pr_err("imsm: dev size of subarray %d is incorrect\n",
12027                                 i);
12028                         goto exit;
12029                 }
12030                 memset(&geo, 0, sizeof(struct geo_params));
12031                 geo.size = d_size;
12032                 u_size = imsm_create_metadata_update_for_size_change(st, &geo,
12033                                                                      &update);
12034                 imsm_update_metadata_locally(st, update, u_size);
12035                 if (st->update_tail) {
12036                         append_metadata_update(st, update, u_size);
12037                         flush_metadata_updates(st);
12038                         st->update_tail = &st->updates;
12039                 } else {
12040                         imsm_sync_metadata(st);
12041                         free(update);
12042                 }
12043         }
12044         ret_val = 0;
12045 exit:
12046         super->current_vol = tmp;
12047         return ret_val;
12048 }
12049
12050 static int imsm_reshape_super(struct supertype *st, unsigned long long size,
12051                               int level,
12052                               int layout, int chunksize, int raid_disks,
12053                               int delta_disks, char *backup, char *dev,
12054                               int direction, int verbose)
12055 {
12056         int ret_val = 1;
12057         struct geo_params geo;
12058
12059         dprintf("(enter)\n");
12060
12061         memset(&geo, 0, sizeof(struct geo_params));
12062
12063         geo.dev_name = dev;
12064         strcpy(geo.devnm, st->devnm);
12065         geo.size = size;
12066         geo.level = level;
12067         geo.layout = layout;
12068         geo.chunksize = chunksize;
12069         geo.raid_disks = raid_disks;
12070         if (delta_disks != UnSet)
12071                 geo.raid_disks += delta_disks;
12072
12073         dprintf("for level      : %i\n", geo.level);
12074         dprintf("for raid_disks : %i\n", geo.raid_disks);
12075
12076         if (strcmp(st->container_devnm, st->devnm) == 0) {
12077                 /* On container level we can only increase number of devices. */
12078                 dprintf("imsm: info: Container operation\n");
12079                 int old_raid_disks = 0;
12080
12081                 if (imsm_reshape_is_allowed_on_container(
12082                             st, &geo, &old_raid_disks, direction)) {
12083                         struct imsm_update_reshape *u = NULL;
12084                         int len;
12085
12086                         if (imsm_fix_size_mismatch(st, -1)) {
12087                                 dprintf("imsm: Cannot fix size mismatch\n");
12088                                 goto exit_imsm_reshape_super;
12089                         }
12090
12091                         len = imsm_create_metadata_update_for_reshape(
12092                                 st, &geo, old_raid_disks, &u);
12093
12094                         if (len <= 0) {
12095                                 dprintf("imsm: Cannot prepare update\n");
12096                                 goto exit_imsm_reshape_super;
12097                         }
12098
12099                         ret_val = 0;
12100                         /* update metadata locally */
12101                         imsm_update_metadata_locally(st, u, len);
12102                         /* and possibly remotely */
12103                         if (st->update_tail)
12104                                 append_metadata_update(st, u, len);
12105                         else
12106                                 free(u);
12107
12108                 } else {
12109                         pr_err("(imsm) Operation is not allowed on this container\n");
12110                 }
12111         } else {
12112                 /* On volume level we support following operations
12113                  * - takeover: raid10 -> raid0; raid0 -> raid10
12114                  * - chunk size migration
12115                  * - migration: raid5 -> raid0; raid0 -> raid5
12116                  */
12117                 struct intel_super *super = st->sb;
12118                 struct intel_dev *dev = super->devlist;
12119                 int change;
12120                 dprintf("imsm: info: Volume operation\n");
12121                 /* find requested device */
12122                 while (dev) {
12123                         char *devnm =
12124                                 imsm_find_array_devnm_by_subdev(
12125                                         dev->index, st->container_devnm);
12126                         if (devnm && strcmp(devnm, geo.devnm) == 0)
12127                                 break;
12128                         dev = dev->next;
12129                 }
12130                 if (dev == NULL) {
12131                         pr_err("Cannot find %s (%s) subarray\n",
12132                                 geo.dev_name, geo.devnm);
12133                         goto exit_imsm_reshape_super;
12134                 }
12135                 super->current_vol = dev->index;
12136                 change = imsm_analyze_change(st, &geo, direction);
12137                 switch (change) {
12138                 case CH_TAKEOVER:
12139                         ret_val = imsm_takeover(st, &geo);
12140                         break;
12141                 case CH_MIGRATION: {
12142                         struct imsm_update_reshape_migration *u = NULL;
12143                         int len =
12144                                 imsm_create_metadata_update_for_migration(
12145                                         st, &geo, &u);
12146                         if (len < 1) {
12147                                 dprintf("imsm: Cannot prepare update\n");
12148                                 break;
12149                         }
12150                         ret_val = 0;
12151                         /* update metadata locally */
12152                         imsm_update_metadata_locally(st, u, len);
12153                         /* and possibly remotely */
12154                         if (st->update_tail)
12155                                 append_metadata_update(st, u, len);
12156                         else
12157                                 free(u);
12158                 }
12159                 break;
12160                 case CH_ARRAY_SIZE: {
12161                         struct imsm_update_size_change *u = NULL;
12162                         int len =
12163                                 imsm_create_metadata_update_for_size_change(
12164                                         st, &geo, &u);
12165                         if (len < 1) {
12166                                 dprintf("imsm: Cannot prepare update\n");
12167                                 break;
12168                         }
12169                         ret_val = 0;
12170                         /* update metadata locally */
12171                         imsm_update_metadata_locally(st, u, len);
12172                         /* and possibly remotely */
12173                         if (st->update_tail)
12174                                 append_metadata_update(st, u, len);
12175                         else
12176                                 free(u);
12177                 }
12178                 break;
12179                 default:
12180                         ret_val = 1;
12181                 }
12182         }
12183
12184 exit_imsm_reshape_super:
12185         dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
12186         return ret_val;
12187 }
12188
12189 #define COMPLETED_OK            0
12190 #define COMPLETED_NONE          1
12191 #define COMPLETED_DELAYED       2
12192
12193 static int read_completed(int fd, unsigned long long *val)
12194 {
12195         int ret;
12196         char buf[SYSFS_MAX_BUF_SIZE];
12197
12198         ret = sysfs_fd_get_str(fd, buf, sizeof(buf));
12199         if (ret < 0)
12200                 return ret;
12201
12202         ret = COMPLETED_OK;
12203         if (str_is_none(buf) == true) {
12204                 ret = COMPLETED_NONE;
12205         } else if (strncmp(buf, "delayed", 7) == 0) {
12206                 ret = COMPLETED_DELAYED;
12207         } else {
12208                 char *ep;
12209                 *val = strtoull(buf, &ep, 0);
12210                 if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
12211                         ret = -1;
12212         }
12213         return ret;
12214 }
12215
12216 /*******************************************************************************
12217  * Function:    wait_for_reshape_imsm
12218  * Description: Function writes new sync_max value and waits until
12219  *              reshape process reach new position
12220  * Parameters:
12221  *      sra             : general array info
12222  *      ndata           : number of disks in new array's layout
12223  * Returns:
12224  *       0 : success,
12225  *       1 : there is no reshape in progress,
12226  *      -1 : fail
12227  ******************************************************************************/
12228 int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
12229 {
12230         int fd = sysfs_get_fd(sra, NULL, "sync_completed");
12231         int retry = 3;
12232         unsigned long long completed;
12233         /* to_complete : new sync_max position */
12234         unsigned long long to_complete = sra->reshape_progress;
12235         unsigned long long position_to_set = to_complete / ndata;
12236
12237         if (!is_fd_valid(fd)) {
12238                 dprintf("cannot open reshape_position\n");
12239                 return 1;
12240         }
12241
12242         do {
12243                 if (sysfs_fd_get_ll(fd, &completed) < 0) {
12244                         if (!retry) {
12245                                 dprintf("cannot read reshape_position (no reshape in progres)\n");
12246                                 close(fd);
12247                                 return 1;
12248                         }
12249                         sleep_for(0, MSEC_TO_NSEC(30), true);
12250                 } else
12251                         break;
12252         } while (retry--);
12253
12254         if (completed > position_to_set) {
12255                 dprintf("wrong next position to set %llu (%llu)\n",
12256                         to_complete, position_to_set);
12257                 close(fd);
12258                 return -1;
12259         }
12260         dprintf("Position set: %llu\n", position_to_set);
12261         if (sysfs_set_num(sra, NULL, "sync_max",
12262                           position_to_set) != 0) {
12263                 dprintf("cannot set reshape position to %llu\n",
12264                         position_to_set);
12265                 close(fd);
12266                 return -1;
12267         }
12268
12269         do {
12270                 int rc;
12271                 char action[SYSFS_MAX_BUF_SIZE];
12272                 int timeout = 3000;
12273
12274                 sysfs_wait(fd, &timeout);
12275                 if (sysfs_get_str(sra, NULL, "sync_action",
12276                                   action, sizeof(action)) > 0 &&
12277                                 strncmp(action, "reshape", 7) != 0) {
12278                         if (strncmp(action, "idle", 4) == 0)
12279                                 break;
12280                         close(fd);
12281                         return -1;
12282                 }
12283
12284                 rc = read_completed(fd, &completed);
12285                 if (rc < 0) {
12286                         dprintf("cannot read reshape_position (in loop)\n");
12287                         close(fd);
12288                         return 1;
12289                 } else if (rc == COMPLETED_NONE)
12290                         break;
12291         } while (completed < position_to_set);
12292
12293         close(fd);
12294         return 0;
12295 }
12296
12297 /*******************************************************************************
12298  * Function:    check_degradation_change
12299  * Description: Check that array hasn't become failed.
12300  * Parameters:
12301  *      info    : for sysfs access
12302  *      sources : source disks descriptors
12303  *      degraded: previous degradation level
12304  * Returns:
12305  *      degradation level
12306  ******************************************************************************/
12307 int check_degradation_change(struct mdinfo *info,
12308                              int *sources,
12309                              int degraded)
12310 {
12311         unsigned long long new_degraded;
12312         int rv;
12313
12314         rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
12315         if (rv == -1 || (new_degraded != (unsigned long long)degraded)) {
12316                 /* check each device to ensure it is still working */
12317                 struct mdinfo *sd;
12318                 new_degraded = 0;
12319                 for (sd = info->devs ; sd ; sd = sd->next) {
12320                         if (sd->disk.state & (1<<MD_DISK_FAULTY))
12321                                 continue;
12322                         if (sd->disk.state & (1<<MD_DISK_SYNC)) {
12323                                 char sbuf[SYSFS_MAX_BUF_SIZE];
12324                                 int raid_disk = sd->disk.raid_disk;
12325
12326                                 if (sysfs_get_str(info,
12327                                         sd, "state", sbuf, sizeof(sbuf)) < 0 ||
12328                                         strstr(sbuf, "faulty") ||
12329                                         strstr(sbuf, "in_sync") == NULL) {
12330                                         /* this device is dead */
12331                                         sd->disk.state = (1<<MD_DISK_FAULTY);
12332                                         if (raid_disk >= 0)
12333                                                 close_fd(&sources[raid_disk]);
12334                                         new_degraded++;
12335                                 }
12336                         }
12337                 }
12338         }
12339
12340         return new_degraded;
12341 }
12342
12343 /*******************************************************************************
12344  * Function:    imsm_manage_reshape
12345  * Description: Function finds array under reshape and it manages reshape
12346  *              process. It creates stripes backups (if required) and sets
12347  *              checkpoints.
12348  * Parameters:
12349  *      afd             : Backup handle (nattive) - not used
12350  *      sra             : general array info
12351  *      reshape         : reshape parameters - not used
12352  *      st              : supertype structure
12353  *      blocks          : size of critical section [blocks]
12354  *      fds             : table of source device descriptor
12355  *      offsets         : start of array (offest per devices)
12356  *      dests           : not used
12357  *      destfd          : table of destination device descriptor
12358  *      destoffsets     : table of destination offsets (per device)
12359  * Returns:
12360  *      1 : success, reshape is done
12361  *      0 : fail
12362  ******************************************************************************/
12363 static int imsm_manage_reshape(
12364         int afd, struct mdinfo *sra, struct reshape *reshape,
12365         struct supertype *st, unsigned long backup_blocks,
12366         int *fds, unsigned long long *offsets,
12367         int dests, int *destfd, unsigned long long *destoffsets)
12368 {
12369         int ret_val = 0;
12370         struct intel_super *super = st->sb;
12371         struct intel_dev *dv;
12372         unsigned int sector_size = super->sector_size;
12373         struct imsm_dev *dev = NULL;
12374         struct imsm_map *map_src, *map_dest;
12375         int migr_vol_qan = 0;
12376         int ndata, odata; /* [bytes] */
12377         int chunk; /* [bytes] */
12378         struct migr_record *migr_rec;
12379         char *buf = NULL;
12380         unsigned int buf_size; /* [bytes] */
12381         unsigned long long max_position; /* array size [bytes] */
12382         unsigned long long next_step; /* [blocks]/[bytes] */
12383         unsigned long long old_data_stripe_length;
12384         unsigned long long start_src; /* [bytes] */
12385         unsigned long long start; /* [bytes] */
12386         unsigned long long start_buf_shift; /* [bytes] */
12387         int degraded = 0;
12388         int source_layout = 0;
12389         int subarray_index = -1;
12390
12391         if (!sra)
12392                 return ret_val;
12393
12394         if (!fds || !offsets)
12395                 goto abort;
12396
12397         /* Find volume during the reshape */
12398         for (dv = super->devlist; dv; dv = dv->next) {
12399                 if (dv->dev->vol.migr_type == MIGR_GEN_MIGR &&
12400                     dv->dev->vol.migr_state == 1) {
12401                         dev = dv->dev;
12402                         migr_vol_qan++;
12403                         subarray_index = dv->index;
12404                 }
12405         }
12406         /* Only one volume can migrate at the same time */
12407         if (migr_vol_qan != 1) {
12408                 pr_err("%s", migr_vol_qan ?
12409                         "Number of migrating volumes greater than 1\n" :
12410                         "There is no volume during migrationg\n");
12411                 goto abort;
12412         }
12413
12414         map_dest = get_imsm_map(dev, MAP_0);
12415         map_src = get_imsm_map(dev, MAP_1);
12416         if (map_src == NULL)
12417                 goto abort;
12418
12419         ndata = imsm_num_data_members(map_dest);
12420         odata = imsm_num_data_members(map_src);
12421
12422         chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512;
12423         old_data_stripe_length = odata * chunk;
12424
12425         migr_rec = super->migr_rec;
12426
12427         /* initialize migration record for start condition */
12428         if (sra->reshape_progress == 0)
12429                 init_migr_record_imsm(st, dev, sra);
12430         else {
12431                 if (__le32_to_cpu(migr_rec->rec_status) != UNIT_SRC_NORMAL) {
12432                         dprintf("imsm: cannot restart migration when data are present in copy area.\n");
12433                         goto abort;
12434                 }
12435                 /* Save checkpoint to update migration record for current
12436                  * reshape position (in md). It can be farther than current
12437                  * reshape position in metadata.
12438                  */
12439                 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
12440                         /* ignore error == 2, this can mean end of reshape here
12441                          */
12442                         dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_NORMAL, initial save)\n");
12443                         goto abort;
12444                 }
12445         }
12446
12447         /* size for data */
12448         buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
12449         /* extend  buffer size for parity disk */
12450         buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
12451         /* add space for stripe alignment */
12452         buf_size += old_data_stripe_length;
12453         if (posix_memalign((void **)&buf, MAX_SECTOR_SIZE, buf_size)) {
12454                 dprintf("imsm: Cannot allocate checkpoint buffer\n");
12455                 goto abort;
12456         }
12457
12458         max_position = sra->component_size * ndata;
12459         source_layout = imsm_level_to_layout(map_src->raid_level);
12460
12461         while (current_migr_unit(migr_rec) <
12462                get_num_migr_units(migr_rec)) {
12463                 /* current reshape position [blocks] */
12464                 unsigned long long current_position =
12465                         __le32_to_cpu(migr_rec->blocks_per_unit)
12466                         * current_migr_unit(migr_rec);
12467                 unsigned long long border;
12468
12469                 /* Check that array hasn't become failed.
12470                  */
12471                 degraded = check_degradation_change(sra, fds, degraded);
12472                 if (degraded > 1) {
12473                         dprintf("imsm: Abort reshape due to degradation level (%i)\n", degraded);
12474                         goto abort;
12475                 }
12476
12477                 next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
12478
12479                 if ((current_position + next_step) > max_position)
12480                         next_step = max_position - current_position;
12481
12482                 start = current_position * 512;
12483
12484                 /* align reading start to old geometry */
12485                 start_buf_shift = start % old_data_stripe_length;
12486                 start_src = start - start_buf_shift;
12487
12488                 border = (start_src / odata) - (start / ndata);
12489                 border /= 512;
12490                 if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
12491                         /* save critical stripes to buf
12492                          * start     - start address of current unit
12493                          *             to backup [bytes]
12494                          * start_src - start address of current unit
12495                          *             to backup alligned to source array
12496                          *             [bytes]
12497                          */
12498                         unsigned long long next_step_filler;
12499                         unsigned long long copy_length = next_step * 512;
12500
12501                         /* allign copy area length to stripe in old geometry */
12502                         next_step_filler = ((copy_length + start_buf_shift)
12503                                             % old_data_stripe_length);
12504                         if (next_step_filler)
12505                                 next_step_filler = (old_data_stripe_length
12506                                                     - next_step_filler);
12507                         dprintf("save_stripes() parameters: start = %llu,\tstart_src = %llu,\tnext_step*512 = %llu,\tstart_in_buf_shift = %llu,\tnext_step_filler = %llu\n",
12508                                 start, start_src, copy_length,
12509                                 start_buf_shift, next_step_filler);
12510
12511                         if (save_stripes(fds, offsets, map_src->num_members,
12512                                          chunk, map_src->raid_level,
12513                                          source_layout, 0, NULL, start_src,
12514                                          copy_length +
12515                                          next_step_filler + start_buf_shift,
12516                                          buf)) {
12517                                 dprintf("imsm: Cannot save stripes to buffer\n");
12518                                 goto abort;
12519                         }
12520                         /* Convert data to destination format and store it
12521                          * in backup general migration area
12522                          */
12523                         if (save_backup_imsm(st, dev, sra,
12524                                 buf + start_buf_shift, copy_length)) {
12525                                 dprintf("imsm: Cannot save stripes to target devices\n");
12526                                 goto abort;
12527                         }
12528                         if (save_checkpoint_imsm(st, sra,
12529                                                  UNIT_SRC_IN_CP_AREA)) {
12530                                 dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_IN_CP_AREA)\n");
12531                                 goto abort;
12532                         }
12533                 } else {
12534                         /* set next step to use whole border area */
12535                         border /= next_step;
12536                         if (border > 1)
12537                                 next_step *= border;
12538                 }
12539                 /* When data backed up, checkpoint stored,
12540                  * kick the kernel to reshape unit of data
12541                  */
12542                 next_step = next_step + sra->reshape_progress;
12543                 /* limit next step to array max position */
12544                 if (next_step > max_position)
12545                         next_step = max_position;
12546                 sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
12547                 sysfs_set_num(sra, NULL, "suspend_hi", next_step);
12548                 sra->reshape_progress = next_step;
12549
12550                 /* wait until reshape finish */
12551                 if (wait_for_reshape_imsm(sra, ndata)) {
12552                         dprintf("wait_for_reshape_imsm returned error!\n");
12553                         goto abort;
12554                 }
12555                 if (sigterm)
12556                         goto abort;
12557
12558                 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
12559                         /* ignore error == 2, this can mean end of reshape here
12560                          */
12561                         dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_NORMAL)\n");
12562                         goto abort;
12563                 }
12564
12565         }
12566
12567         /* clear migr_rec on disks after successful migration */
12568         struct dl *d;
12569
12570         memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
12571         for (d = super->disks; d; d = d->next) {
12572                 if (d->index < 0 || is_failed(&d->disk))
12573                         continue;
12574                 unsigned long long dsize;
12575
12576                 get_dev_size(d->fd, NULL, &dsize);
12577                 if (lseek64(d->fd, dsize - MIGR_REC_SECTOR_POSITION*sector_size,
12578                             SEEK_SET) >= 0) {
12579                         if ((unsigned int)write(d->fd, super->migr_rec_buf,
12580                             MIGR_REC_BUF_SECTORS*sector_size) !=
12581                             MIGR_REC_BUF_SECTORS*sector_size)
12582                                 perror("Write migr_rec failed");
12583                 }
12584         }
12585
12586         /* return '1' if done */
12587         ret_val = 1;
12588
12589         /* After the reshape eliminate size mismatch in metadata.
12590          * Don't update md/component_size here, volume hasn't
12591          * to take whole space. It is allowed by kernel.
12592          * md/component_size will be set propoperly after next assembly.
12593          */
12594         imsm_fix_size_mismatch(st, subarray_index);
12595
12596 abort:
12597         free(buf);
12598         /* See Grow.c: abort_reshape() for further explanation */
12599         sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
12600         sysfs_set_num(sra, NULL, "suspend_hi", 0);
12601         sysfs_set_num(sra, NULL, "suspend_lo", 0);
12602
12603         return ret_val;
12604 }
12605
12606 /*******************************************************************************
12607  * Function:    calculate_bitmap_min_chunksize
12608  * Description: Calculates the minimal valid bitmap chunk size
12609  * Parameters:
12610  *      max_bits        : indicate how many bits can be used for the bitmap
12611  *      data_area_size  : the size of the data area covered by the bitmap
12612  *
12613  * Returns:
12614  *       The bitmap chunk size
12615  ******************************************************************************/
12616 static unsigned long long
12617 calculate_bitmap_min_chunksize(unsigned long long max_bits,
12618                                unsigned long long data_area_size)
12619 {
12620         unsigned long long min_chunk =
12621                 4096; /* sub-page chunks don't work yet.. */
12622         unsigned long long bits = data_area_size / min_chunk + 1;
12623
12624         while (bits > max_bits) {
12625                 min_chunk *= 2;
12626                 bits = (bits + 1) / 2;
12627         }
12628         return min_chunk;
12629 }
12630
12631 /*******************************************************************************
12632  * Function:    calculate_bitmap_chunksize
12633  * Description: Calculates the bitmap chunk size for the given device
12634  * Parameters:
12635  *      st      : supertype information
12636  *      dev     : device for the bitmap
12637  *
12638  * Returns:
12639  *       The bitmap chunk size
12640  ******************************************************************************/
12641 static unsigned long long calculate_bitmap_chunksize(struct supertype *st,
12642                                                      struct imsm_dev *dev)
12643 {
12644         struct intel_super *super = st->sb;
12645         unsigned long long min_chunksize;
12646         unsigned long long result = IMSM_DEFAULT_BITMAP_CHUNKSIZE;
12647         size_t dev_size = imsm_dev_size(dev);
12648
12649         min_chunksize = calculate_bitmap_min_chunksize(
12650                 IMSM_BITMAP_AREA_SIZE * super->sector_size, dev_size);
12651
12652         if (result < min_chunksize)
12653                 result = min_chunksize;
12654
12655         return result;
12656 }
12657
12658 /*******************************************************************************
12659  * Function:    init_bitmap_header
12660  * Description: Initialize the bitmap header structure
12661  * Parameters:
12662  *      st      : supertype information
12663  *      bms     : bitmap header struct to initialize
12664  *      dev     : device for the bitmap
12665  *
12666  * Returns:
12667  *       0 : success
12668  *      -1 : fail
12669  ******************************************************************************/
12670 static int init_bitmap_header(struct supertype *st, struct bitmap_super_s *bms,
12671                               struct imsm_dev *dev)
12672 {
12673         int vol_uuid[4];
12674
12675         if (!bms || !dev)
12676                 return -1;
12677
12678         bms->magic = __cpu_to_le32(BITMAP_MAGIC);
12679         bms->version = __cpu_to_le32(BITMAP_MAJOR_HI);
12680         bms->daemon_sleep = __cpu_to_le32(IMSM_DEFAULT_BITMAP_DAEMON_SLEEP);
12681         bms->sync_size = __cpu_to_le64(IMSM_BITMAP_AREA_SIZE);
12682         bms->write_behind = __cpu_to_le32(0);
12683
12684         uuid_from_super_imsm(st, vol_uuid);
12685         memcpy(bms->uuid, vol_uuid, 16);
12686
12687         bms->chunksize = calculate_bitmap_chunksize(st, dev);
12688
12689         return 0;
12690 }
12691
12692 /*******************************************************************************
12693  * Function:    validate_internal_bitmap_for_drive
12694  * Description: Verify if the bitmap header for a given drive.
12695  * Parameters:
12696  *      st      : supertype information
12697  *      offset  : The offset from the beginning of the drive where to look for
12698  *                the bitmap header.
12699  *      d       : the drive info
12700  *
12701  * Returns:
12702  *       0 : success
12703  *      -1 : fail
12704  ******************************************************************************/
12705 static int validate_internal_bitmap_for_drive(struct supertype *st,
12706                                               unsigned long long offset,
12707                                               struct dl *d)
12708 {
12709         struct intel_super *super = st->sb;
12710         int ret = -1;
12711         int vol_uuid[4];
12712         bitmap_super_t *bms;
12713         int fd;
12714
12715         if (!d)
12716                 return -1;
12717
12718         void *read_buf;
12719
12720         if (posix_memalign(&read_buf, MAX_SECTOR_SIZE, IMSM_BITMAP_HEADER_SIZE))
12721                 return -1;
12722
12723         fd = d->fd;
12724         if (!is_fd_valid(fd)) {
12725                 fd = open(d->devname, O_RDONLY, 0);
12726
12727                 if (!is_fd_valid(fd)) {
12728                         dprintf("cannot open the device %s\n", d->devname);
12729                         goto abort;
12730                 }
12731         }
12732
12733         if (lseek64(fd, offset * super->sector_size, SEEK_SET) < 0)
12734                 goto abort;
12735         if (read(fd, read_buf, IMSM_BITMAP_HEADER_SIZE) !=
12736             IMSM_BITMAP_HEADER_SIZE)
12737                 goto abort;
12738
12739         uuid_from_super_imsm(st, vol_uuid);
12740
12741         bms = read_buf;
12742         if ((bms->magic != __cpu_to_le32(BITMAP_MAGIC)) ||
12743             (bms->version != __cpu_to_le32(BITMAP_MAJOR_HI)) ||
12744             (!same_uuid((int *)bms->uuid, vol_uuid, st->ss->swapuuid))) {
12745                 dprintf("wrong bitmap header detected\n");
12746                 goto abort;
12747         }
12748
12749         ret = 0;
12750 abort:
12751         if (!is_fd_valid(d->fd))
12752                 close_fd(&fd);
12753
12754         if (read_buf)
12755                 free(read_buf);
12756
12757         return ret;
12758 }
12759
12760 /*******************************************************************************
12761  * Function:    validate_internal_bitmap_imsm
12762  * Description: Verify if the bitmap header is in place and with proper data.
12763  * Parameters:
12764  *      st      : supertype information
12765  *
12766  * Returns:
12767  *       0 : success or device w/o RWH_BITMAP
12768  *      -1 : fail
12769  ******************************************************************************/
12770 static int validate_internal_bitmap_imsm(struct supertype *st)
12771 {
12772         struct intel_super *super = st->sb;
12773         struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
12774         unsigned long long offset;
12775         struct dl *d;
12776
12777         if (dev->rwh_policy != RWH_BITMAP)
12778                 return 0;
12779
12780         offset = get_bitmap_header_sector(super, super->current_vol);
12781         for (d = super->disks; d; d = d->next) {
12782                 if (d->index < 0 || is_failed(&d->disk))
12783                         continue;
12784
12785                 if (validate_internal_bitmap_for_drive(st, offset, d)) {
12786                         pr_err("imsm: bitmap validation failed\n");
12787                         return -1;
12788                 }
12789         }
12790         return 0;
12791 }
12792
12793 /*******************************************************************************
12794  * Function:    add_internal_bitmap_imsm
12795  * Description: Mark the volume to use the bitmap and updates the chunk size value.
12796  * Parameters:
12797  *      st              : supertype information
12798  *      chunkp          : bitmap chunk size
12799  *      delay           : not used for imsm
12800  *      write_behind    : not used for imsm
12801  *      size            : not used for imsm
12802  *      may_change      : not used for imsm
12803  *      amajor          : not used for imsm
12804  *
12805  * Returns:
12806  *       0 : success
12807  *      -1 : fail
12808  ******************************************************************************/
12809 static int add_internal_bitmap_imsm(struct supertype *st, int *chunkp,
12810                                     int delay, int write_behind,
12811                                     unsigned long long size, int may_change,
12812                                     int amajor)
12813 {
12814         struct intel_super *super = st->sb;
12815         int vol_idx = super->current_vol;
12816         struct imsm_dev *dev;
12817
12818         if (!super->devlist || vol_idx == -1 || !chunkp)
12819                 return -1;
12820
12821         dev = get_imsm_dev(super, vol_idx);
12822         dev->rwh_policy = RWH_BITMAP;
12823         *chunkp = calculate_bitmap_chunksize(st, dev);
12824         return 0;
12825 }
12826
12827 /*******************************************************************************
12828  * Function:    locate_bitmap_imsm
12829  * Description: Seek 'fd' to start of write-intent-bitmap.
12830  * Parameters:
12831  *      st              : supertype information
12832  *      fd              : file descriptor for the device
12833  *      node_num        : not used for imsm
12834  *
12835  * Returns:
12836  *       0 : success
12837  *      -1 : fail
12838  ******************************************************************************/
12839 static int locate_bitmap_imsm(struct supertype *st, int fd, int node_num)
12840 {
12841         struct intel_super *super = st->sb;
12842         unsigned long long offset;
12843         int vol_idx = super->current_vol;
12844
12845         if (!super->devlist || vol_idx == -1)
12846                 return -1;
12847
12848         offset = get_bitmap_header_sector(super, super->current_vol);
12849         dprintf("bitmap header offset is %llu\n", offset);
12850
12851         lseek64(fd, offset << 9, 0);
12852
12853         return 0;
12854 }
12855
12856 /*******************************************************************************
12857  * Function:    write_init_bitmap_imsm
12858  * Description: Write a bitmap header and prepares the area for the bitmap.
12859  * Parameters:
12860  *      st      : supertype information
12861  *      fd      : file descriptor for the device
12862  *      update  : not used for imsm
12863  *
12864  * Returns:
12865  *       0 : success
12866  *      -1 : fail
12867  ******************************************************************************/
12868 static int write_init_bitmap_imsm(struct supertype *st, int fd,
12869                                   enum bitmap_update update)
12870 {
12871         struct intel_super *super = st->sb;
12872         int vol_idx = super->current_vol;
12873         int ret = 0;
12874         unsigned long long offset;
12875         bitmap_super_t bms = { 0 };
12876         size_t written = 0;
12877         size_t to_write;
12878         ssize_t rv_num;
12879         void *buf;
12880
12881         if (!super->devlist || !super->sector_size || vol_idx == -1)
12882                 return -1;
12883
12884         struct imsm_dev *dev = get_imsm_dev(super, vol_idx);
12885
12886         /* first clear the space for bitmap header */
12887         unsigned long long bitmap_area_start =
12888                 get_bitmap_header_sector(super, vol_idx);
12889
12890         dprintf("zeroing area start (%llu) and size (%u)\n", bitmap_area_start,
12891                 IMSM_BITMAP_AND_HEADER_SIZE / super->sector_size);
12892         if (zero_disk_range(fd, bitmap_area_start,
12893                             IMSM_BITMAP_HEADER_SIZE / super->sector_size)) {
12894                 pr_err("imsm: cannot zeroing the space for the bitmap\n");
12895                 return -1;
12896         }
12897
12898         /* The bitmap area should be filled with "1"s to perform initial
12899          * synchronization.
12900          */
12901         if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE))
12902                 return -1;
12903         memset(buf, 0xFF, MAX_SECTOR_SIZE);
12904         offset = get_bitmap_sector(super, vol_idx);
12905         lseek64(fd, offset << 9, 0);
12906         while (written < IMSM_BITMAP_AREA_SIZE) {
12907                 to_write = IMSM_BITMAP_AREA_SIZE - written;
12908                 if (to_write > MAX_SECTOR_SIZE)
12909                         to_write = MAX_SECTOR_SIZE;
12910                 rv_num = write(fd, buf, MAX_SECTOR_SIZE);
12911                 if (rv_num != MAX_SECTOR_SIZE) {
12912                         ret = -1;
12913                         dprintf("cannot initialize bitmap area\n");
12914                         goto abort;
12915                 }
12916                 written += rv_num;
12917         }
12918
12919         /* write a bitmap header */
12920         init_bitmap_header(st, &bms, dev);
12921         memset(buf, 0, MAX_SECTOR_SIZE);
12922         memcpy(buf, &bms, sizeof(bitmap_super_t));
12923         if (locate_bitmap_imsm(st, fd, 0)) {
12924                 ret = -1;
12925                 dprintf("cannot locate the bitmap\n");
12926                 goto abort;
12927         }
12928         if (write(fd, buf, MAX_SECTOR_SIZE) != MAX_SECTOR_SIZE) {
12929                 ret = -1;
12930                 dprintf("cannot write the bitmap header\n");
12931                 goto abort;
12932         }
12933         fsync(fd);
12934
12935 abort:
12936         free(buf);
12937
12938         return ret;
12939 }
12940
12941 /*******************************************************************************
12942  * Function:    is_vol_to_setup_bitmap
12943  * Description: Checks if a bitmap should be activated on the dev.
12944  * Parameters:
12945  *      info    : info about the volume to setup the bitmap
12946  *      dev     : the device to check against bitmap creation
12947  *
12948  * Returns:
12949  *       0 : bitmap should be set up on the device
12950  *      -1 : otherwise
12951  ******************************************************************************/
12952 static int is_vol_to_setup_bitmap(struct mdinfo *info, struct imsm_dev *dev)
12953 {
12954         if (!dev || !info)
12955                 return -1;
12956
12957         if ((strcmp((char *)dev->volume, info->name) == 0) &&
12958             (dev->rwh_policy == RWH_BITMAP))
12959                 return -1;
12960
12961         return 0;
12962 }
12963
12964 /*******************************************************************************
12965  * Function:    set_bitmap_sysfs
12966  * Description: Set the sysfs atributes of a given volume to activate the bitmap.
12967  * Parameters:
12968  *      info            : info about the volume where the bitmap should be setup
12969  *      chunksize       : bitmap chunk size
12970  *      location        : location of the bitmap
12971  *
12972  * Returns:
12973  *       0 : success
12974  *      -1 : fail
12975  ******************************************************************************/
12976 static int set_bitmap_sysfs(struct mdinfo *info, unsigned long long chunksize,
12977                             char *location)
12978 {
12979         /* The bitmap/metadata is set to external to allow changing of value for
12980          * bitmap/location. When external is used, the kernel will treat an offset
12981          * related to the device's first lba (in opposition to the "internal" case
12982          * when this value is related to the beginning of the superblock).
12983          */
12984         if (sysfs_set_str(info, NULL, "bitmap/metadata", "external")) {
12985                 dprintf("failed to set bitmap/metadata\n");
12986                 return -1;
12987         }
12988
12989         /* It can only be changed when no bitmap is active.
12990          * Should be bigger than 512 and must be power of 2.
12991          * It is expecting the value in bytes.
12992          */
12993         if (sysfs_set_num(info, NULL, "bitmap/chunksize",
12994                                           __cpu_to_le32(chunksize))) {
12995                 dprintf("failed to set bitmap/chunksize\n");
12996                 return -1;
12997         }
12998
12999         /* It is expecting the value in sectors. */
13000         if (sysfs_set_num(info, NULL, "bitmap/space",
13001                                           __cpu_to_le64(IMSM_BITMAP_AREA_SIZE))) {
13002                 dprintf("failed to set bitmap/space\n");
13003                 return -1;
13004         }
13005
13006         /* Determines the delay between the bitmap updates.
13007          * It is expecting the value in seconds.
13008          */
13009         if (sysfs_set_num(info, NULL, "bitmap/time_base",
13010                                           __cpu_to_le64(IMSM_DEFAULT_BITMAP_DAEMON_SLEEP))) {
13011                 dprintf("failed to set bitmap/time_base\n");
13012                 return -1;
13013         }
13014
13015         /* It is expecting the value in sectors with a sign at the beginning. */
13016         if (sysfs_set_str(info, NULL, "bitmap/location", location)) {
13017                 dprintf("failed to set bitmap/location\n");
13018                 return -1;
13019         }
13020
13021         return 0;
13022 }
13023
13024 /*******************************************************************************
13025  * Function:    set_bitmap_imsm
13026  * Description: Setup the bitmap for the given volume
13027  * Parameters:
13028  *      st      : supertype information
13029  *      info    : info about the volume where the bitmap should be setup
13030  *
13031  * Returns:
13032  *       0 : success
13033  *      -1 : fail
13034  ******************************************************************************/
13035 static int set_bitmap_imsm(struct supertype *st, struct mdinfo *info)
13036 {
13037         struct intel_super *super = st->sb;
13038         int prev_current_vol = super->current_vol;
13039         struct imsm_dev *dev;
13040         int ret = -1;
13041         char location[16] = "";
13042         unsigned long long chunksize;
13043         struct intel_dev *dev_it;
13044
13045         for (dev_it = super->devlist; dev_it; dev_it = dev_it->next) {
13046                 super->current_vol = dev_it->index;
13047                 dev = get_imsm_dev(super, super->current_vol);
13048
13049                 if (is_vol_to_setup_bitmap(info, dev)) {
13050                         if (validate_internal_bitmap_imsm(st)) {
13051                                 dprintf("bitmap header validation failed\n");
13052                                 goto abort;
13053                         }
13054
13055                         chunksize = calculate_bitmap_chunksize(st, dev);
13056                         dprintf("chunk size is %llu\n", chunksize);
13057
13058                         snprintf(location, sizeof(location), "+%llu",
13059                                  get_bitmap_sector(super, super->current_vol));
13060                         dprintf("bitmap offset is %s\n", location);
13061
13062                         if (set_bitmap_sysfs(info, chunksize, location)) {
13063                                 dprintf("cannot setup the bitmap\n");
13064                                 goto abort;
13065                         }
13066                 }
13067         }
13068         ret = 0;
13069 abort:
13070         super->current_vol = prev_current_vol;
13071         return ret;
13072 }
13073
13074 struct superswitch super_imsm = {
13075         .examine_super  = examine_super_imsm,
13076         .brief_examine_super = brief_examine_super_imsm,
13077         .brief_examine_subarrays = brief_examine_subarrays_imsm,
13078         .export_examine_super = export_examine_super_imsm,
13079         .detail_super   = detail_super_imsm,
13080         .brief_detail_super = brief_detail_super_imsm,
13081         .write_init_super = write_init_super_imsm,
13082         .validate_geometry = validate_geometry_imsm,
13083         .add_to_super   = add_to_super_imsm,
13084         .remove_from_super = remove_from_super_imsm,
13085         .detail_platform = detail_platform_imsm,
13086         .export_detail_platform = export_detail_platform_imsm,
13087         .kill_subarray = kill_subarray_imsm,
13088         .update_subarray = update_subarray_imsm,
13089         .load_container = load_container_imsm,
13090         .default_geometry = default_geometry_imsm,
13091         .test_and_add_drive_policies = test_and_add_drive_policies_imsm,
13092         .reshape_super  = imsm_reshape_super,
13093         .manage_reshape = imsm_manage_reshape,
13094         .recover_backup = recover_backup_imsm,
13095         .examine_badblocks = examine_badblocks_imsm,
13096         .match_home     = match_home_imsm,
13097         .uuid_from_super= uuid_from_super_imsm,
13098         .getinfo_super  = getinfo_super_imsm,
13099         .getinfo_super_disks = getinfo_super_disks_imsm,
13100         .update_super   = update_super_imsm,
13101
13102         .avail_size     = avail_size_imsm,
13103         .get_spare_criteria = get_spare_criteria_imsm,
13104
13105         .compare_super  = compare_super_imsm,
13106
13107         .load_super     = load_super_imsm,
13108         .init_super     = init_super_imsm,
13109         .store_super    = store_super_imsm,
13110         .free_super     = free_super_imsm,
13111         .match_metadata_desc = match_metadata_desc_imsm,
13112         .container_content = container_content_imsm,
13113         .validate_container = validate_container_imsm,
13114
13115         .add_internal_bitmap = add_internal_bitmap_imsm,
13116         .locate_bitmap = locate_bitmap_imsm,
13117         .write_bitmap = write_init_bitmap_imsm,
13118         .set_bitmap = set_bitmap_imsm,
13119
13120         .write_init_ppl = write_init_ppl_imsm,
13121         .validate_ppl   = validate_ppl_imsm,
13122
13123         .external       = 1,
13124         .swapuuid       = 0,
13125         .name = "imsm",
13126
13127 /* for mdmon */
13128         .open_new       = imsm_open_new,
13129         .set_array_state= imsm_set_array_state,
13130         .set_disk       = imsm_set_disk,
13131         .sync_metadata  = imsm_sync_metadata,
13132         .activate_spare = imsm_activate_spare,
13133         .process_update = imsm_process_update,
13134         .prepare_update = imsm_prepare_update,
13135         .record_bad_block = imsm_record_badblock,
13136         .clear_bad_block  = imsm_clear_badblock,
13137         .get_bad_blocks   = imsm_get_badblocks,
13138 };