super-intel.c

   1 /*
   2  * mdadm - Intel(R) Matrix Storage Manager Support
   3  *
   4  * Copyright (C) 2002-2008 Intel Corporation
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along with
  16  * this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  18  */
  19
  20 #define HAVE_STDINT_H 1
  21 #include "mdadm.h"
  22 #include "mdmon.h"
  23 #include "sha1.h"
  24 #include "platform-intel.h"
  25 #include <values.h>
  26 #include <scsi/sg.h>
  27 #include <ctype.h>
  28 #include <dirent.h>
  29
  30 /* MPB == Metadata Parameter Block */
  31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
  32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
  33 #define MPB_VERSION_RAID0 "1.0.00"
  34 #define MPB_VERSION_RAID1 "1.1.00"
  35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
  36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
  37 #define MPB_VERSION_RAID5 "1.2.02"
  38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
  39 #define MPB_VERSION_CNG "1.2.06"
  40 #define MPB_VERSION_ATTRIBS "1.3.00"
  41 #define MAX_SIGNATURE_LENGTH  32
  42 #define MAX_RAID_SERIAL_LEN   16
  43
  44 /* supports RAID0 */
  45 #define MPB_ATTRIB_RAID0                __cpu_to_le32(0x00000001)
  46 /* supports RAID1 */
  47 #define MPB_ATTRIB_RAID1                __cpu_to_le32(0x00000002)
  48 /* supports RAID10 */
  49 #define MPB_ATTRIB_RAID10               __cpu_to_le32(0x00000004)
  50 /* supports RAID1E */
  51 #define MPB_ATTRIB_RAID1E               __cpu_to_le32(0x00000008)
  52 /* supports RAID5 */
  53 #define MPB_ATTRIB_RAID5                __cpu_to_le32(0x00000010)
  54 /* supports RAID CNG */
  55 #define MPB_ATTRIB_RAIDCNG              __cpu_to_le32(0x00000020)
  56 /* supports expanded stripe sizes of  256K, 512K and 1MB */
  57 #define MPB_ATTRIB_EXP_STRIPE_SIZE      __cpu_to_le32(0x00000040)
  58
  59 /* The OROM Support RST Caching of Volumes */
  60 #define MPB_ATTRIB_NVM                  __cpu_to_le32(0x02000000)
  61 /* The OROM supports creating disks greater than 2TB */
  62 #define MPB_ATTRIB_2TB_DISK             __cpu_to_le32(0x04000000)
  63 /* The OROM supports Bad Block Management */
  64 #define MPB_ATTRIB_BBM                  __cpu_to_le32(0x08000000)
  65
  66 /* THe OROM Supports NVM Caching of Volumes */
  67 #define MPB_ATTRIB_NEVER_USE2           __cpu_to_le32(0x10000000)
  68 /* The OROM supports creating volumes greater than 2TB */
  69 #define MPB_ATTRIB_2TB                  __cpu_to_le32(0x20000000)
  70 /* originally for PMP, now it's wasted b/c. Never use this bit! */
  71 #define MPB_ATTRIB_NEVER_USE            __cpu_to_le32(0x40000000)
  72 /* Verify MPB contents against checksum after reading MPB */
  73 #define MPB_ATTRIB_CHECKSUM_VERIFY      __cpu_to_le32(0x80000000)
  74
  75 /* Define all supported attributes that have to be accepted by mdadm
  76  */
  77 #define MPB_ATTRIB_SUPPORTED           (MPB_ATTRIB_CHECKSUM_VERIFY | \
  78                                         MPB_ATTRIB_2TB             | \
  79                                         MPB_ATTRIB_2TB_DISK        | \
  80                                         MPB_ATTRIB_RAID0           | \
  81                                         MPB_ATTRIB_RAID1           | \
  82                                         MPB_ATTRIB_RAID10          | \
  83                                         MPB_ATTRIB_RAID5           | \
  84                                         MPB_ATTRIB_EXP_STRIPE_SIZE)
  85
  86 /* Define attributes that are unused but not harmful */
  87 #define MPB_ATTRIB_IGNORED              (MPB_ATTRIB_NEVER_USE)
  88
  89 #define MPB_SECTOR_CNT 2210
  90 #define IMSM_RESERVED_SECTORS 4096
  91 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
  92 #define SECT_PER_MB_SHIFT 11
  93
  94 /* Disk configuration info. */
  95 #define IMSM_MAX_DEVICES 255
  96 struct imsm_disk {
  97         __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
  98         __u32 total_blocks_lo;           /* 0xE8 - 0xEB total blocks lo */
  99         __u32 scsi_id;                   /* 0xEC - 0xEF scsi ID */
 100 #define SPARE_DISK      __cpu_to_le32(0x01)  /* Spare */
 101 #define CONFIGURED_DISK __cpu_to_le32(0x02)  /* Member of some RaidDev */
 102 #define FAILED_DISK     __cpu_to_le32(0x04)  /* Permanent failure */
 103         __u32 status;                    /* 0xF0 - 0xF3 */
 104         __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
 105         __u32 total_blocks_hi;           /* 0xF4 - 0xF5 total blocks hi */
 106 #define IMSM_DISK_FILLERS       3
 107         __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
 108 };
 109
 110 /* map selector for map managment
 111  */
 112 #define MAP_0           0
 113 #define MAP_1           1
 114 #define MAP_X           -1
 115
 116 /* RAID map configuration infos. */
 117 struct imsm_map {
 118         __u32 pba_of_lba0_lo;   /* start address of partition */
 119         __u32 blocks_per_member_lo;/* blocks per member */
 120         __u32 num_data_stripes_lo;      /* number of data stripes */
 121         __u16 blocks_per_strip;
 122         __u8  map_state;        /* Normal, Uninitialized, Degraded, Failed */
 123 #define IMSM_T_STATE_NORMAL 0
 124 #define IMSM_T_STATE_UNINITIALIZED 1
 125 #define IMSM_T_STATE_DEGRADED 2
 126 #define IMSM_T_STATE_FAILED 3
 127         __u8  raid_level;
 128 #define IMSM_T_RAID0 0
 129 #define IMSM_T_RAID1 1
 130 #define IMSM_T_RAID5 5          /* since metadata version 1.2.02 ? */
 131         __u8  num_members;      /* number of member disks */
 132         __u8  num_domains;      /* number of parity domains */
 133         __u8  failed_disk_num;  /* valid only when state is degraded */
 134         __u8  ddf;
 135         __u32 pba_of_lba0_hi;
 136         __u32 blocks_per_member_hi;
 137         __u32 num_data_stripes_hi;
 138         __u32 filler[4];        /* expansion area */
 139 #define IMSM_ORD_REBUILD (1 << 24)
 140         __u32 disk_ord_tbl[1];  /* disk_ord_tbl[num_members],
 141                                  * top byte contains some flags
 142                                  */
 143 } __attribute__ ((packed));
 144
 145 struct imsm_vol {
 146         __u32 curr_migr_unit;
 147         __u32 checkpoint_id;    /* id to access curr_migr_unit */
 148         __u8  migr_state;       /* Normal or Migrating */
 149 #define MIGR_INIT 0
 150 #define MIGR_REBUILD 1
 151 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
 152 #define MIGR_GEN_MIGR 3
 153 #define MIGR_STATE_CHANGE 4
 154 #define MIGR_REPAIR 5
 155         __u8  migr_type;        /* Initializing, Rebuilding, ... */
 156         __u8  dirty;
 157         __u8  fs_state;         /* fast-sync state for CnG (0xff == disabled) */
 158         __u16 verify_errors;    /* number of mismatches */
 159         __u16 bad_blocks;       /* number of bad blocks during verify */
 160         __u32 filler[4];
 161         struct imsm_map map[1];
 162         /* here comes another one if migr_state */
 163 } __attribute__ ((packed));
 164
 165 struct imsm_dev {
 166         __u8  volume[MAX_RAID_SERIAL_LEN];
 167         __u32 size_low;
 168         __u32 size_high;
 169 #define DEV_BOOTABLE            __cpu_to_le32(0x01)
 170 #define DEV_BOOT_DEVICE         __cpu_to_le32(0x02)
 171 #define DEV_READ_COALESCING     __cpu_to_le32(0x04)
 172 #define DEV_WRITE_COALESCING    __cpu_to_le32(0x08)
 173 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
 174 #define DEV_HIDDEN_AT_BOOT      __cpu_to_le32(0x20)
 175 #define DEV_CURRENTLY_HIDDEN    __cpu_to_le32(0x40)
 176 #define DEV_VERIFY_AND_FIX      __cpu_to_le32(0x80)
 177 #define DEV_MAP_STATE_UNINIT    __cpu_to_le32(0x100)
 178 #define DEV_NO_AUTO_RECOVERY    __cpu_to_le32(0x200)
 179 #define DEV_CLONE_N_GO          __cpu_to_le32(0x400)
 180 #define DEV_CLONE_MAN_SYNC      __cpu_to_le32(0x800)
 181 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
 182         __u32 status;   /* Persistent RaidDev status */
 183         __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
 184         __u8  migr_priority;
 185         __u8  num_sub_vols;
 186         __u8  tid;
 187         __u8  cng_master_disk;
 188         __u16 cache_policy;
 189         __u8  cng_state;
 190         __u8  cng_sub_state;
 191 #define IMSM_DEV_FILLERS 10
 192         __u32 filler[IMSM_DEV_FILLERS];
 193         struct imsm_vol vol;
 194 } __attribute__ ((packed));
 195
 196 struct imsm_super {
 197         __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
 198         __u32 check_sum;                /* 0x20 - 0x23 MPB Checksum */
 199         __u32 mpb_size;                 /* 0x24 - 0x27 Size of MPB */
 200         __u32 family_num;               /* 0x28 - 0x2B Checksum from first time this config was written */
 201         __u32 generation_num;           /* 0x2C - 0x2F Incremented each time this array's MPB is written */
 202         __u32 error_log_size;           /* 0x30 - 0x33 in bytes */
 203         __u32 attributes;               /* 0x34 - 0x37 */
 204         __u8 num_disks;                 /* 0x38 Number of configured disks */
 205         __u8 num_raid_devs;             /* 0x39 Number of configured volumes */
 206         __u8 error_log_pos;             /* 0x3A  */
 207         __u8 fill[1];                   /* 0x3B */
 208         __u32 cache_size;               /* 0x3c - 0x40 in mb */
 209         __u32 orig_family_num;          /* 0x40 - 0x43 original family num */
 210         __u32 pwr_cycle_count;          /* 0x44 - 0x47 simulated power cycle count for array */
 211         __u32 bbm_log_size;             /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
 212 #define IMSM_FILLERS 35
 213         __u32 filler[IMSM_FILLERS];     /* 0x4C - 0xD7 RAID_MPB_FILLERS */
 214         struct imsm_disk disk[1];       /* 0xD8 diskTbl[numDisks] */
 215         /* here comes imsm_dev[num_raid_devs] */
 216         /* here comes BBM logs */
 217 } __attribute__ ((packed));
 218
 219 #define BBM_LOG_MAX_ENTRIES 254
 220
 221 struct bbm_log_entry {
 222         __u64 defective_block_start;
 223 #define UNREADABLE 0xFFFFFFFF
 224         __u32 spare_block_offset;
 225         __u16 remapped_marked_count;
 226         __u16 disk_ordinal;
 227 } __attribute__ ((__packed__));
 228
 229 struct bbm_log {
 230         __u32 signature; /* 0xABADB10C */
 231         __u32 entry_count;
 232         __u32 reserved_spare_block_count; /* 0 */
 233         __u32 reserved; /* 0xFFFF */
 234         __u64 first_spare_lba;
 235         struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
 236 } __attribute__ ((__packed__));
 237
 238
 239 #ifndef MDASSEMBLE
 240 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
 241 #endif
 242
 243 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
 244
 245 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
 246
 247 #define MIGR_REC_BUF_SIZE 512 /* size of migr_record i/o buffer */
 248 #define MIGR_REC_POSITION 512 /* migr_record position offset on disk,
 249                                * MIGR_REC_BUF_SIZE <= MIGR_REC_POSITION
 250                                */
 251
 252
 253 #define UNIT_SRC_NORMAL     0   /* Source data for curr_migr_unit must
 254                                  *  be recovered using srcMap */
 255 #define UNIT_SRC_IN_CP_AREA 1   /* Source data for curr_migr_unit has
 256                                  *  already been migrated and must
 257                                  *  be recovered from checkpoint area */
 258 struct migr_record {
 259         __u32 rec_status;           /* Status used to determine how to restart
 260                                      * migration in case it aborts
 261                                      * in some fashion */
 262         __u32 curr_migr_unit;       /* 0..numMigrUnits-1 */
 263         __u32 family_num;           /* Family number of MPB
 264                                      * containing the RaidDev
 265                                      * that is migrating */
 266         __u32 ascending_migr;       /* True if migrating in increasing
 267                                      * order of lbas */
 268         __u32 blocks_per_unit;      /* Num disk blocks per unit of operation */
 269         __u32 dest_depth_per_unit;  /* Num member blocks each destMap
 270                                      * member disk
 271                                      * advances per unit-of-operation */
 272         __u32 ckpt_area_pba;        /* Pba of first block of ckpt copy area */
 273         __u32 dest_1st_member_lba;  /* First member lba on first
 274                                      * stripe of destination */
 275         __u32 num_migr_units;       /* Total num migration units-of-op */
 276         __u32 post_migr_vol_cap;    /* Size of volume after
 277                                      * migration completes */
 278         __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
 279         __u32 ckpt_read_disk_num;   /* Which member disk in destSubMap[0] the
 280                                      * migration ckpt record was read from
 281                                      * (for recovered migrations) */
 282 } __attribute__ ((__packed__));
 283
 284 struct md_list {
 285         /* usage marker:
 286          *  1: load metadata
 287          *  2: metadata does not match
 288          *  4: already checked
 289          */
 290         int   used;
 291         char  *devname;
 292         int   found;
 293         int   container;
 294         dev_t st_rdev;
 295         struct md_list *next;
 296 };
 297
 298 #define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
 299
 300 static __u8 migr_type(struct imsm_dev *dev)
 301 {
 302         if (dev->vol.migr_type == MIGR_VERIFY &&
 303             dev->status & DEV_VERIFY_AND_FIX)
 304                 return MIGR_REPAIR;
 305         else
 306                 return dev->vol.migr_type;
 307 }
 308
 309 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
 310 {
 311         /* for compatibility with older oroms convert MIGR_REPAIR, into
 312          * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
 313          */
 314         if (migr_type == MIGR_REPAIR) {
 315                 dev->vol.migr_type = MIGR_VERIFY;
 316                 dev->status |= DEV_VERIFY_AND_FIX;
 317         } else {
 318                 dev->vol.migr_type = migr_type;
 319                 dev->status &= ~DEV_VERIFY_AND_FIX;
 320         }
 321 }
 322
 323 static unsigned int sector_count(__u32 bytes)
 324 {
 325         return ROUND_UP(bytes, 512) / 512;
 326 }
 327
 328 static unsigned int mpb_sectors(struct imsm_super *mpb)
 329 {
 330         return sector_count(__le32_to_cpu(mpb->mpb_size));
 331 }
 332
 333 struct intel_dev {
 334         struct imsm_dev *dev;
 335         struct intel_dev *next;
 336         unsigned index;
 337 };
 338
 339 struct intel_hba {
 340         enum sys_dev_type type;
 341         char *path;
 342         char *pci_id;
 343         struct intel_hba *next;
 344 };
 345
 346 enum action {
 347         DISK_REMOVE = 1,
 348         DISK_ADD
 349 };
 350 /* internal representation of IMSM metadata */
 351 struct intel_super {
 352         union {
 353                 void *buf; /* O_DIRECT buffer for reading/writing metadata */
 354                 struct imsm_super *anchor; /* immovable parameters */
 355         };
 356         union {
 357                 void *migr_rec_buf; /* buffer for I/O operations */
 358                 struct migr_record *migr_rec; /* migration record */
 359         };
 360         int clean_migration_record_by_mdmon; /* when reshape is switched to next
 361                 array, it indicates that mdmon is allowed to clean migration
 362                 record */
 363         size_t len; /* size of the 'buf' allocation */
 364         void *next_buf; /* for realloc'ing buf from the manager */
 365         size_t next_len;
 366         int updates_pending; /* count of pending updates for mdmon */
 367         int current_vol; /* index of raid device undergoing creation */
 368         unsigned long long create_offset; /* common start for 'current_vol' */
 369         __u32 random; /* random data for seeding new family numbers */
 370         struct intel_dev *devlist;
 371         struct dl {
 372                 struct dl *next;
 373                 int index;
 374                 __u8 serial[MAX_RAID_SERIAL_LEN];
 375                 int major, minor;
 376                 char *devname;
 377                 struct imsm_disk disk;
 378                 int fd;
 379                 int extent_cnt;
 380                 struct extent *e; /* for determining freespace @ create */
 381                 int raiddisk; /* slot to fill in autolayout */
 382                 enum action action;
 383         } *disks, *current_disk;
 384         struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
 385                                       active */
 386         struct dl *missing; /* disks removed while we weren't looking */
 387         struct bbm_log *bbm_log;
 388         struct intel_hba *hba; /* device path of the raid controller for this metadata */
 389         const struct imsm_orom *orom; /* platform firmware support */
 390         struct intel_super *next; /* (temp) list for disambiguating family_num */
 391 };
 392
 393 struct intel_disk {
 394         struct imsm_disk disk;
 395         #define IMSM_UNKNOWN_OWNER (-1)
 396         int owner;
 397         struct intel_disk *next;
 398 };
 399
 400 struct extent {
 401         unsigned long long start, size;
 402 };
 403
 404 /* definitions of reshape process types */
 405 enum imsm_reshape_type {
 406         CH_TAKEOVER,
 407         CH_MIGRATION,
 408         CH_ARRAY_SIZE,
 409 };
 410
 411 /* definition of messages passed to imsm_process_update */
 412 enum imsm_update_type {
 413         update_activate_spare,
 414         update_create_array,
 415         update_kill_array,
 416         update_rename_array,
 417         update_add_remove_disk,
 418         update_reshape_container_disks,
 419         update_reshape_migration,
 420         update_takeover,
 421         update_general_migration_checkpoint,
 422         update_size_change,
 423 };
 424
 425 struct imsm_update_activate_spare {
 426         enum imsm_update_type type;
 427         struct dl *dl;
 428         int slot;
 429         int array;
 430         struct imsm_update_activate_spare *next;
 431 };
 432
 433 struct geo_params {
 434         char devnm[32];
 435         char *dev_name;
 436         unsigned long long size;
 437         int level;
 438         int layout;
 439         int chunksize;
 440         int raid_disks;
 441 };
 442
 443 enum takeover_direction {
 444         R10_TO_R0,
 445         R0_TO_R10
 446 };
 447 struct imsm_update_takeover {
 448         enum imsm_update_type type;
 449         int subarray;
 450         enum takeover_direction direction;
 451 };
 452
 453 struct imsm_update_reshape {
 454         enum imsm_update_type type;
 455         int old_raid_disks;
 456         int new_raid_disks;
 457
 458         int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
 459 };
 460
 461 struct imsm_update_reshape_migration {
 462         enum imsm_update_type type;
 463         int old_raid_disks;
 464         int new_raid_disks;
 465         /* fields for array migration changes
 466          */
 467         int subdev;
 468         int new_level;
 469         int new_layout;
 470         int new_chunksize;
 471
 472         int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
 473 };
 474
 475 struct imsm_update_size_change {
 476         enum imsm_update_type type;
 477         int subdev;
 478         long long new_size;
 479 };
 480
 481 struct imsm_update_general_migration_checkpoint {
 482         enum imsm_update_type type;
 483         __u32 curr_migr_unit;
 484 };
 485
 486 struct disk_info {
 487         __u8 serial[MAX_RAID_SERIAL_LEN];
 488 };
 489
 490 struct imsm_update_create_array {
 491         enum imsm_update_type type;
 492         int dev_idx;
 493         struct imsm_dev dev;
 494 };
 495
 496 struct imsm_update_kill_array {
 497         enum imsm_update_type type;
 498         int dev_idx;
 499 };
 500
 501 struct imsm_update_rename_array {
 502         enum imsm_update_type type;
 503         __u8 name[MAX_RAID_SERIAL_LEN];
 504         int dev_idx;
 505 };
 506
 507 struct imsm_update_add_remove_disk {
 508         enum imsm_update_type type;
 509 };
 510
 511
 512 static const char *_sys_dev_type[] = {
 513         [SYS_DEV_UNKNOWN] = "Unknown",
 514         [SYS_DEV_SAS] = "SAS",
 515         [SYS_DEV_SATA] = "SATA"
 516 };
 517
 518 const char *get_sys_dev_type(enum sys_dev_type type)
 519 {
 520         if (type >= SYS_DEV_MAX)
 521                 type = SYS_DEV_UNKNOWN;
 522
 523         return _sys_dev_type[type];
 524 }
 525
 526 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
 527 {
 528         struct intel_hba *result = xmalloc(sizeof(*result));
 529
 530         result->type = device->type;
 531         result->path = xstrdup(device->path);
 532         result->next = NULL;
 533         if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
 534                 result->pci_id++;
 535
 536         return result;
 537 }
 538
 539 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
 540 {
 541         struct intel_hba *result=NULL;
 542         for (result = hba; result; result = result->next) {
 543                 if (result->type == device->type && strcmp(result->path, device->path) == 0)
 544                         break;
 545         }
 546         return result;
 547 }
 548
 549 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
 550 {
 551         struct intel_hba *hba;
 552
 553         /* check if disk attached to Intel HBA */
 554         hba = find_intel_hba(super->hba, device);
 555         if (hba != NULL)
 556                 return 1;
 557         /* Check if HBA is already attached to super */
 558         if (super->hba == NULL) {
 559                 super->hba = alloc_intel_hba(device);
 560                 return 1;
 561         } else
 562                 /* IMSM metadata disallows to attach disks to multiple
 563                  * controllers.
 564                  */
 565                 return 2;
 566 }
 567
 568 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
 569 {
 570         struct sys_dev *list, *elem;
 571         char *disk_path;
 572
 573         if ((list = find_intel_devices()) == NULL)
 574                 return 0;
 575
 576         if (fd < 0)
 577                 disk_path  = (char *) devname;
 578         else
 579                 disk_path = diskfd_to_devpath(fd);
 580
 581         if (!disk_path)
 582                 return 0;
 583
 584         for (elem = list; elem; elem = elem->next)
 585                 if (path_attached_to_hba(disk_path, elem->path))
 586                         return elem;
 587
 588         if (disk_path != devname)
 589                 free(disk_path);
 590
 591         return NULL;
 592 }
 593
 594
 595 static int find_intel_hba_capability(int fd, struct intel_super *super,
 596                                      char *devname);
 597
 598 static struct supertype *match_metadata_desc_imsm(char *arg)
 599 {
 600         struct supertype *st;
 601
 602         if (strcmp(arg, "imsm") != 0 &&
 603             strcmp(arg, "default") != 0
 604                 )
 605                 return NULL;
 606
 607         st = xcalloc(1, sizeof(*st));
 608         st->ss = &super_imsm;
 609         st->max_devs = IMSM_MAX_DEVICES;
 610         st->minor_version = 0;
 611         st->sb = NULL;
 612         return st;
 613 }
 614
 615 #ifndef MDASSEMBLE
 616 static __u8 *get_imsm_version(struct imsm_super *mpb)
 617 {
 618         return &mpb->sig[MPB_SIG_LEN];
 619 }
 620 #endif
 621
 622 /* retrieve a disk directly from the anchor when the anchor is known to be
 623  * up-to-date, currently only at load time
 624  */
 625 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
 626 {
 627         if (index >= mpb->num_disks)
 628                 return NULL;
 629         return &mpb->disk[index];
 630 }
 631
 632 /* retrieve the disk description based on a index of the disk
 633  * in the sub-array
 634  */
 635 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
 636 {
 637         struct dl *d;
 638
 639         for (d = super->disks; d; d = d->next)
 640                 if (d->index == index)
 641                         return d;
 642
 643         return NULL;
 644 }
 645 /* retrieve a disk from the parsed metadata */
 646 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
 647 {
 648         struct dl *dl;
 649
 650         dl = get_imsm_dl_disk(super, index);
 651         if (dl)
 652                 return &dl->disk;
 653
 654         return NULL;
 655 }
 656
 657 /* generate a checksum directly from the anchor when the anchor is known to be
 658  * up-to-date, currently only at load or write_super after coalescing
 659  */
 660 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
 661 {
 662         __u32 end = mpb->mpb_size / sizeof(end);
 663         __u32 *p = (__u32 *) mpb;
 664         __u32 sum = 0;
 665
 666         while (end--) {
 667                 sum += __le32_to_cpu(*p);
 668                 p++;
 669         }
 670
 671         return sum - __le32_to_cpu(mpb->check_sum);
 672 }
 673
 674 static size_t sizeof_imsm_map(struct imsm_map *map)
 675 {
 676         return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
 677 }
 678
 679 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
 680 {
 681         /* A device can have 2 maps if it is in the middle of a migration.
 682          * If second_map is:
 683          *    MAP_0 - we return the first map
 684          *    MAP_1 - we return the second map if it exists, else NULL
 685          *    MAP_X - we return the second map if it exists, else the first
 686          */
 687         struct imsm_map *map = &dev->vol.map[0];
 688         struct imsm_map *map2 = NULL;
 689
 690         if (dev->vol.migr_state)
 691                 map2 = (void *)map + sizeof_imsm_map(map);
 692
 693         switch (second_map) {
 694         case MAP_0:
 695                 break;
 696         case MAP_1:
 697                 map = map2;
 698                 break;
 699         case MAP_X:
 700                 if (map2)
 701                         map = map2;
 702                 break;
 703         default:
 704                 map = NULL;
 705         }
 706         return map;
 707
 708 }
 709
 710 /* return the size of the device.
 711  * migr_state increases the returned size if map[0] were to be duplicated
 712  */
 713 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
 714 {
 715         size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
 716                       sizeof_imsm_map(get_imsm_map(dev, MAP_0));
 717
 718         /* migrating means an additional map */
 719         if (dev->vol.migr_state)
 720                 size += sizeof_imsm_map(get_imsm_map(dev, MAP_1));
 721         else if (migr_state)
 722                 size += sizeof_imsm_map(get_imsm_map(dev, MAP_0));
 723
 724         return size;
 725 }
 726
 727 #ifndef MDASSEMBLE
 728 /* retrieve disk serial number list from a metadata update */
 729 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
 730 {
 731         void *u = update;
 732         struct disk_info *inf;
 733
 734         inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
 735               sizeof_imsm_dev(&update->dev, 0);
 736
 737         return inf;
 738 }
 739 #endif
 740
 741 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
 742 {
 743         int offset;
 744         int i;
 745         void *_mpb = mpb;
 746
 747         if (index >= mpb->num_raid_devs)
 748                 return NULL;
 749
 750         /* devices start after all disks */
 751         offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
 752
 753         for (i = 0; i <= index; i++)
 754                 if (i == index)
 755                         return _mpb + offset;
 756                 else
 757                         offset += sizeof_imsm_dev(_mpb + offset, 0);
 758
 759         return NULL;
 760 }
 761
 762 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
 763 {
 764         struct intel_dev *dv;
 765
 766         if (index >= super->anchor->num_raid_devs)
 767                 return NULL;
 768         for (dv = super->devlist; dv; dv = dv->next)
 769                 if (dv->index == index)
 770                         return dv->dev;
 771         return NULL;
 772 }
 773
 774 /*
 775  * for second_map:
 776  *  == MAP_0 get first map
 777  *  == MAP_1 get second map
 778  *  == MAP_X than get map according to the current migr_state
 779  */
 780 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
 781                                   int slot,
 782                                   int second_map)
 783 {
 784         struct imsm_map *map;
 785
 786         map = get_imsm_map(dev, second_map);
 787
 788         /* top byte identifies disk under rebuild */
 789         return __le32_to_cpu(map->disk_ord_tbl[slot]);
 790 }
 791
 792 #define ord_to_idx(ord) (((ord) << 8) >> 8)
 793 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
 794 {
 795         __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
 796
 797         return ord_to_idx(ord);
 798 }
 799
 800 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
 801 {
 802         map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
 803 }
 804
 805 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
 806 {
 807         int slot;
 808         __u32 ord;
 809
 810         for (slot = 0; slot < map->num_members; slot++) {
 811                 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
 812                 if (ord_to_idx(ord) == idx)
 813                         return slot;
 814         }
 815
 816         return -1;
 817 }
 818
 819 static int get_imsm_raid_level(struct imsm_map *map)
 820 {
 821         if (map->raid_level == 1) {
 822                 if (map->num_members == 2)
 823                         return 1;
 824                 else
 825                         return 10;
 826         }
 827
 828         return map->raid_level;
 829 }
 830
 831 static int cmp_extent(const void *av, const void *bv)
 832 {
 833         const struct extent *a = av;
 834         const struct extent *b = bv;
 835         if (a->start < b->start)
 836                 return -1;
 837         if (a->start > b->start)
 838                 return 1;
 839         return 0;
 840 }
 841
 842 static int count_memberships(struct dl *dl, struct intel_super *super)
 843 {
 844         int memberships = 0;
 845         int i;
 846
 847         for (i = 0; i < super->anchor->num_raid_devs; i++) {
 848                 struct imsm_dev *dev = get_imsm_dev(super, i);
 849                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
 850
 851                 if (get_imsm_disk_slot(map, dl->index) >= 0)
 852                         memberships++;
 853         }
 854
 855         return memberships;
 856 }
 857
 858 static __u32 imsm_min_reserved_sectors(struct intel_super *super);
 859
 860 static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
 861 {
 862         if (lo == 0 || hi == 0)
 863                 return 1;
 864         *lo = __le32_to_cpu((unsigned)n);
 865         *hi = __le32_to_cpu((unsigned)(n >> 32));
 866         return 0;
 867 }
 868
 869 static unsigned long long join_u32(__u32 lo, __u32 hi)
 870 {
 871         return (unsigned long long)__le32_to_cpu(lo) |
 872                (((unsigned long long)__le32_to_cpu(hi)) << 32);
 873 }
 874
 875 static unsigned long long total_blocks(struct imsm_disk *disk)
 876 {
 877         if (disk == NULL)
 878                 return 0;
 879         return join_u32(disk->total_blocks_lo, disk->total_blocks_hi);
 880 }
 881
 882 static unsigned long long pba_of_lba0(struct imsm_map *map)
 883 {
 884         if (map == NULL)
 885                 return 0;
 886         return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi);
 887 }
 888
 889 static unsigned long long blocks_per_member(struct imsm_map *map)
 890 {
 891         if (map == NULL)
 892                 return 0;
 893         return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi);
 894 }
 895
 896 #ifndef MDASSEMBLE
 897 static unsigned long long num_data_stripes(struct imsm_map *map)
 898 {
 899         if (map == NULL)
 900                 return 0;
 901         return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
 902 }
 903
 904 static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
 905 {
 906         split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
 907 }
 908 #endif
 909
 910 static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
 911 {
 912         split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi);
 913 }
 914
 915 static void set_blocks_per_member(struct imsm_map *map, unsigned long long n)
 916 {
 917         split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi);
 918 }
 919
 920 static void set_num_data_stripes(struct imsm_map *map, unsigned long long n)
 921 {
 922         split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
 923 }
 924
 925 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
 926 {
 927         /* find a list of used extents on the given physical device */
 928         struct extent *rv, *e;
 929         int i;
 930         int memberships = count_memberships(dl, super);
 931         __u32 reservation;
 932
 933         /* trim the reserved area for spares, so they can join any array
 934          * regardless of whether the OROM has assigned sectors from the
 935          * IMSM_RESERVED_SECTORS region
 936          */
 937         if (dl->index == -1)
 938                 reservation = imsm_min_reserved_sectors(super);
 939         else
 940                 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
 941
 942         rv = xcalloc(sizeof(struct extent), (memberships + 1));
 943         e = rv;
 944
 945         for (i = 0; i < super->anchor->num_raid_devs; i++) {
 946                 struct imsm_dev *dev = get_imsm_dev(super, i);
 947                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
 948
 949                 if (get_imsm_disk_slot(map, dl->index) >= 0) {
 950                         e->start = pba_of_lba0(map);
 951                         e->size = blocks_per_member(map);
 952                         e++;
 953                 }
 954         }
 955         qsort(rv, memberships, sizeof(*rv), cmp_extent);
 956
 957         /* determine the start of the metadata
 958          * when no raid devices are defined use the default
 959          * ...otherwise allow the metadata to truncate the value
 960          * as is the case with older versions of imsm
 961          */
 962         if (memberships) {
 963                 struct extent *last = &rv[memberships - 1];
 964                 unsigned long long remainder;
 965
 966                 remainder = total_blocks(&dl->disk) - (last->start + last->size);
 967                 /* round down to 1k block to satisfy precision of the kernel
 968                  * 'size' interface
 969                  */
 970                 remainder &= ~1UL;
 971                 /* make sure remainder is still sane */
 972                 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
 973                         remainder = ROUND_UP(super->len, 512) >> 9;
 974                 if (reservation > remainder)
 975                         reservation = remainder;
 976         }
 977         e->start = total_blocks(&dl->disk) - reservation;
 978         e->size = 0;
 979         return rv;
 980 }
 981
 982 /* try to determine how much space is reserved for metadata from
 983  * the last get_extents() entry, otherwise fallback to the
 984  * default
 985  */
 986 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
 987 {
 988         struct extent *e;
 989         int i;
 990         __u32 rv;
 991
 992         /* for spares just return a minimal reservation which will grow
 993          * once the spare is picked up by an array
 994          */
 995         if (dl->index == -1)
 996                 return MPB_SECTOR_CNT;
 997
 998         e = get_extents(super, dl);
 999         if (!e)
1000                 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1001
1002         /* scroll to last entry */
1003         for (i = 0; e[i].size; i++)
1004                 continue;
1005
1006         rv = total_blocks(&dl->disk) - e[i].start;
1007
1008         free(e);
1009
1010         return rv;
1011 }
1012
1013 static int is_spare(struct imsm_disk *disk)
1014 {
1015         return (disk->status & SPARE_DISK) == SPARE_DISK;
1016 }
1017
1018 static int is_configured(struct imsm_disk *disk)
1019 {
1020         return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
1021 }
1022
1023 static int is_failed(struct imsm_disk *disk)
1024 {
1025         return (disk->status & FAILED_DISK) == FAILED_DISK;
1026 }
1027
1028 /* try to determine how much space is reserved for metadata from
1029  * the last get_extents() entry on the smallest active disk,
1030  * otherwise fallback to the default
1031  */
1032 static __u32 imsm_min_reserved_sectors(struct intel_super *super)
1033 {
1034         struct extent *e;
1035         int i;
1036         unsigned long long min_active;
1037         __u32 remainder;
1038         __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1039         struct dl *dl, *dl_min = NULL;
1040
1041         if (!super)
1042                 return rv;
1043
1044         min_active = 0;
1045         for (dl = super->disks; dl; dl = dl->next) {
1046                 if (dl->index < 0)
1047                         continue;
1048                 unsigned long long blocks = total_blocks(&dl->disk);
1049                 if (blocks < min_active || min_active == 0) {
1050                         dl_min = dl;
1051                         min_active = blocks;
1052                 }
1053         }
1054         if (!dl_min)
1055                 return rv;
1056
1057         /* find last lba used by subarrays on the smallest active disk */
1058         e = get_extents(super, dl_min);
1059         if (!e)
1060                 return rv;
1061         for (i = 0; e[i].size; i++)
1062                 continue;
1063
1064         remainder = min_active - e[i].start;
1065         free(e);
1066
1067         /* to give priority to recovery we should not require full
1068            IMSM_RESERVED_SECTORS from the spare */
1069         rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION;
1070
1071         /* if real reservation is smaller use that value */
1072         return  (remainder < rv) ? remainder : rv;
1073 }
1074
1075 /* Return minimum size of a spare that can be used in this array*/
1076 static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
1077 {
1078         struct intel_super *super = st->sb;
1079         struct dl *dl;
1080         struct extent *e;
1081         int i;
1082         unsigned long long rv = 0;
1083
1084         if (!super)
1085                 return rv;
1086         /* find first active disk in array */
1087         dl = super->disks;
1088         while (dl && (is_failed(&dl->disk) || dl->index == -1))
1089                 dl = dl->next;
1090         if (!dl)
1091                 return rv;
1092         /* find last lba used by subarrays */
1093         e = get_extents(super, dl);
1094         if (!e)
1095                 return rv;
1096         for (i = 0; e[i].size; i++)
1097                 continue;
1098         if (i > 0)
1099                 rv = e[i-1].start + e[i-1].size;
1100         free(e);
1101
1102         /* add the amount of space needed for metadata */
1103         rv = rv + imsm_min_reserved_sectors(super);
1104
1105         return rv * 512;
1106 }
1107
1108 static int is_gen_migration(struct imsm_dev *dev);
1109
1110 #ifndef MDASSEMBLE
1111 static __u64 blocks_per_migr_unit(struct intel_super *super,
1112                                   struct imsm_dev *dev);
1113
1114 static void print_imsm_dev(struct intel_super *super,
1115                            struct imsm_dev *dev,
1116                            char *uuid,
1117                            int disk_idx)
1118 {
1119         __u64 sz;
1120         int slot, i;
1121         struct imsm_map *map = get_imsm_map(dev, MAP_0);
1122         struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
1123         __u32 ord;
1124
1125         printf("\n");
1126         printf("[%.16s]:\n", dev->volume);
1127         printf("           UUID : %s\n", uuid);
1128         printf("     RAID Level : %d", get_imsm_raid_level(map));
1129         if (map2)
1130                 printf(" <-- %d", get_imsm_raid_level(map2));
1131         printf("\n");
1132         printf("        Members : %d", map->num_members);
1133         if (map2)
1134                 printf(" <-- %d", map2->num_members);
1135         printf("\n");
1136         printf("          Slots : [");
1137         for (i = 0; i < map->num_members; i++) {
1138                 ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
1139                 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1140         }
1141         printf("]");
1142         if (map2) {
1143                 printf(" <-- [");
1144                 for (i = 0; i < map2->num_members; i++) {
1145                         ord = get_imsm_ord_tbl_ent(dev, i, MAP_1);
1146                         printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1147                 }
1148                 printf("]");
1149         }
1150         printf("\n");
1151         printf("    Failed disk : ");
1152         if (map->failed_disk_num == 0xff)
1153                 printf("none");
1154         else
1155                 printf("%i", map->failed_disk_num);
1156         printf("\n");
1157         slot = get_imsm_disk_slot(map, disk_idx);
1158         if (slot >= 0) {
1159                 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
1160                 printf("      This Slot : %d%s\n", slot,
1161                        ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
1162         } else
1163                 printf("      This Slot : ?\n");
1164         sz = __le32_to_cpu(dev->size_high);
1165         sz <<= 32;
1166         sz += __le32_to_cpu(dev->size_low);
1167         printf("     Array Size : %llu%s\n", (unsigned long long)sz,
1168                human_size(sz * 512));
1169         sz = blocks_per_member(map);
1170         printf("   Per Dev Size : %llu%s\n", (unsigned long long)sz,
1171                human_size(sz * 512));
1172         printf("  Sector Offset : %llu\n",
1173                 pba_of_lba0(map));
1174         printf("    Num Stripes : %llu\n",
1175                 num_data_stripes(map));
1176         printf("     Chunk Size : %u KiB",
1177                 __le16_to_cpu(map->blocks_per_strip) / 2);
1178         if (map2)
1179                 printf(" <-- %u KiB",
1180                         __le16_to_cpu(map2->blocks_per_strip) / 2);
1181         printf("\n");
1182         printf("       Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
1183         printf("  Migrate State : ");
1184         if (dev->vol.migr_state) {
1185                 if (migr_type(dev) == MIGR_INIT)
1186                         printf("initialize\n");
1187                 else if (migr_type(dev) == MIGR_REBUILD)
1188                         printf("rebuild\n");
1189                 else if (migr_type(dev) == MIGR_VERIFY)
1190                         printf("check\n");
1191                 else if (migr_type(dev) == MIGR_GEN_MIGR)
1192                         printf("general migration\n");
1193                 else if (migr_type(dev) == MIGR_STATE_CHANGE)
1194                         printf("state change\n");
1195                 else if (migr_type(dev) == MIGR_REPAIR)
1196                         printf("repair\n");
1197                 else
1198                         printf("<unknown:%d>\n", migr_type(dev));
1199         } else
1200                 printf("idle\n");
1201         printf("      Map State : %s", map_state_str[map->map_state]);
1202         if (dev->vol.migr_state) {
1203                 struct imsm_map *map = get_imsm_map(dev, MAP_1);
1204
1205                 printf(" <-- %s", map_state_str[map->map_state]);
1206                 printf("\n     Checkpoint : %u ",
1207                            __le32_to_cpu(dev->vol.curr_migr_unit));
1208                 if ((is_gen_migration(dev)) && ((slot > 1) || (slot < 0)))
1209                         printf("(N/A)");
1210                 else
1211                         printf("(%llu)", (unsigned long long)
1212                                    blocks_per_migr_unit(super, dev));
1213         }
1214         printf("\n");
1215         printf("    Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
1216 }
1217
1218 static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved)
1219 {
1220         char str[MAX_RAID_SERIAL_LEN + 1];
1221         __u64 sz;
1222
1223         if (index < -1 || !disk)
1224                 return;
1225
1226         printf("\n");
1227         snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1228         if (index >= 0)
1229                 printf("  Disk%02d Serial : %s\n", index, str);
1230         else
1231                 printf("    Disk Serial : %s\n", str);
1232         printf("          State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1233                                             is_configured(disk) ? " active" : "",
1234                                             is_failed(disk) ? " failed" : "");
1235         printf("             Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1236         sz = total_blocks(disk) - reserved;
1237         printf("    Usable Size : %llu%s\n", (unsigned long long)sz,
1238                human_size(sz * 512));
1239 }
1240
1241 void examine_migr_rec_imsm(struct intel_super *super)
1242 {
1243         struct migr_record *migr_rec = super->migr_rec;
1244         struct imsm_super *mpb = super->anchor;
1245         int i;
1246
1247         for (i = 0; i < mpb->num_raid_devs; i++) {
1248                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1249                 struct imsm_map *map;
1250                 int slot = -1;
1251
1252                 if (is_gen_migration(dev) == 0)
1253                                 continue;
1254
1255                 printf("\nMigration Record Information:");
1256
1257                 /* first map under migration */
1258                 map = get_imsm_map(dev, MAP_0);
1259                 if (map)
1260                         slot = get_imsm_disk_slot(map, super->disks->index);
1261                 if ((map == NULL) || (slot > 1) || (slot < 0)) {
1262                         printf(" Empty\n                              ");
1263                         printf("Examine one of first two disks in array\n");
1264                         break;
1265                 }
1266                 printf("\n                     Status : ");
1267                 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1268                         printf("Normal\n");
1269                 else
1270                         printf("Contains Data\n");
1271                 printf("               Current Unit : %u\n",
1272                        __le32_to_cpu(migr_rec->curr_migr_unit));
1273                 printf("                     Family : %u\n",
1274                        __le32_to_cpu(migr_rec->family_num));
1275                 printf("                  Ascending : %u\n",
1276                        __le32_to_cpu(migr_rec->ascending_migr));
1277                 printf("            Blocks Per Unit : %u\n",
1278                        __le32_to_cpu(migr_rec->blocks_per_unit));
1279                 printf("       Dest. Depth Per Unit : %u\n",
1280                        __le32_to_cpu(migr_rec->dest_depth_per_unit));
1281                 printf("        Checkpoint Area pba : %u\n",
1282                        __le32_to_cpu(migr_rec->ckpt_area_pba));
1283                 printf("           First member lba : %u\n",
1284                        __le32_to_cpu(migr_rec->dest_1st_member_lba));
1285                 printf("      Total Number of Units : %u\n",
1286                        __le32_to_cpu(migr_rec->num_migr_units));
1287                 printf("             Size of volume : %u\n",
1288                        __le32_to_cpu(migr_rec->post_migr_vol_cap));
1289                 printf("  Expansion space for LBA64 : %u\n",
1290                        __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
1291                 printf("       Record was read from : %u\n",
1292                        __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1293
1294                 break;
1295         }
1296 }
1297 #endif /* MDASSEMBLE */
1298 /*******************************************************************************
1299  * function: imsm_check_attributes
1300  * Description: Function checks if features represented by attributes flags
1301  *              are supported by mdadm.
1302  * Parameters:
1303  *              attributes - Attributes read from metadata
1304  * Returns:
1305  *              0 - passed attributes contains unsupported features flags
1306  *              1 - all features are supported
1307  ******************************************************************************/
1308 static int imsm_check_attributes(__u32 attributes)
1309 {
1310         int ret_val = 1;
1311         __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
1312
1313         not_supported &= ~MPB_ATTRIB_IGNORED;
1314
1315         not_supported &= attributes;
1316         if (not_supported) {
1317                 pr_err("(IMSM): Unsupported attributes : %x\n",
1318                         (unsigned)__le32_to_cpu(not_supported));
1319                 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1320                         dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
1321                         not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1322                 }
1323                 if (not_supported & MPB_ATTRIB_2TB) {
1324                         dprintf("\t\tMPB_ATTRIB_2TB\n");
1325                         not_supported ^= MPB_ATTRIB_2TB;
1326                 }
1327                 if (not_supported & MPB_ATTRIB_RAID0) {
1328                         dprintf("\t\tMPB_ATTRIB_RAID0\n");
1329                         not_supported ^= MPB_ATTRIB_RAID0;
1330                 }
1331                 if (not_supported & MPB_ATTRIB_RAID1) {
1332                         dprintf("\t\tMPB_ATTRIB_RAID1\n");
1333                         not_supported ^= MPB_ATTRIB_RAID1;
1334                 }
1335                 if (not_supported & MPB_ATTRIB_RAID10) {
1336                         dprintf("\t\tMPB_ATTRIB_RAID10\n");
1337                         not_supported ^= MPB_ATTRIB_RAID10;
1338                 }
1339                 if (not_supported & MPB_ATTRIB_RAID1E) {
1340                         dprintf("\t\tMPB_ATTRIB_RAID1E\n");
1341                         not_supported ^= MPB_ATTRIB_RAID1E;
1342                 }
1343                 if (not_supported & MPB_ATTRIB_RAID5) {
1344                 dprintf("\t\tMPB_ATTRIB_RAID5\n");
1345                         not_supported ^= MPB_ATTRIB_RAID5;
1346                 }
1347                 if (not_supported & MPB_ATTRIB_RAIDCNG) {
1348                         dprintf("\t\tMPB_ATTRIB_RAIDCNG\n");
1349                         not_supported ^= MPB_ATTRIB_RAIDCNG;
1350                 }
1351                 if (not_supported & MPB_ATTRIB_BBM) {
1352                         dprintf("\t\tMPB_ATTRIB_BBM\n");
1353                 not_supported ^= MPB_ATTRIB_BBM;
1354                 }
1355                 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1356                         dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n");
1357                         not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1358                 }
1359                 if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) {
1360                         dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n");
1361                         not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE;
1362                 }
1363                 if (not_supported & MPB_ATTRIB_2TB_DISK) {
1364                         dprintf("\t\tMPB_ATTRIB_2TB_DISK\n");
1365                         not_supported ^= MPB_ATTRIB_2TB_DISK;
1366                 }
1367                 if (not_supported & MPB_ATTRIB_NEVER_USE2) {
1368                         dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n");
1369                         not_supported ^= MPB_ATTRIB_NEVER_USE2;
1370                 }
1371                 if (not_supported & MPB_ATTRIB_NEVER_USE) {
1372                         dprintf("\t\tMPB_ATTRIB_NEVER_USE\n");
1373                         not_supported ^= MPB_ATTRIB_NEVER_USE;
1374                 }
1375
1376                 if (not_supported)
1377                         dprintf(Name "(IMSM): Unknown attributes : %x\n", not_supported);
1378
1379                 ret_val = 0;
1380         }
1381
1382         return ret_val;
1383 }
1384
1385 #ifndef MDASSEMBLE
1386 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
1387
1388 static void examine_super_imsm(struct supertype *st, char *homehost)
1389 {
1390         struct intel_super *super = st->sb;
1391         struct imsm_super *mpb = super->anchor;
1392         char str[MAX_SIGNATURE_LENGTH];
1393         int i;
1394         struct mdinfo info;
1395         char nbuf[64];
1396         __u32 sum;
1397         __u32 reserved = imsm_reserved_sectors(super, super->disks);
1398         struct dl *dl;
1399
1400         snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1401         printf("          Magic : %s\n", str);
1402         snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1403         printf("        Version : %s\n", get_imsm_version(mpb));
1404         printf("    Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
1405         printf("         Family : %08x\n", __le32_to_cpu(mpb->family_num));
1406         printf("     Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
1407         printf("     Attributes : ");
1408         if (imsm_check_attributes(mpb->attributes))
1409                 printf("All supported\n");
1410         else
1411                 printf("not supported\n");
1412         getinfo_super_imsm(st, &info, NULL);
1413         fname_from_uuid(st, &info, nbuf, ':');
1414         printf("           UUID : %s\n", nbuf + 5);
1415         sum = __le32_to_cpu(mpb->check_sum);
1416         printf("       Checksum : %08x %s\n", sum,
1417                 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
1418         printf("    MPB Sectors : %d\n", mpb_sectors(mpb));
1419         printf("          Disks : %d\n", mpb->num_disks);
1420         printf("   RAID Devices : %d\n", mpb->num_raid_devs);
1421         print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved);
1422         if (super->bbm_log) {
1423                 struct bbm_log *log = super->bbm_log;
1424
1425                 printf("\n");
1426                 printf("Bad Block Management Log:\n");
1427                 printf("       Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1428                 printf("      Signature : %x\n", __le32_to_cpu(log->signature));
1429                 printf("    Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1430                 printf("   Spare Blocks : %d\n",  __le32_to_cpu(log->reserved_spare_block_count));
1431                 printf("    First Spare : %llx\n",
1432                        (unsigned long long) __le64_to_cpu(log->first_spare_lba));
1433         }
1434         for (i = 0; i < mpb->num_raid_devs; i++) {
1435                 struct mdinfo info;
1436                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1437
1438                 super->current_vol = i;
1439                 getinfo_super_imsm(st, &info, NULL);
1440                 fname_from_uuid(st, &info, nbuf, ':');
1441                 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
1442         }
1443         for (i = 0; i < mpb->num_disks; i++) {
1444                 if (i == super->disks->index)
1445                         continue;
1446                 print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved);
1447         }
1448
1449         for (dl = super->disks; dl; dl = dl->next)
1450                 if (dl->index == -1)
1451                         print_imsm_disk(&dl->disk, -1, reserved);
1452
1453         examine_migr_rec_imsm(super);
1454 }
1455
1456 static void brief_examine_super_imsm(struct supertype *st, int verbose)
1457 {
1458         /* We just write a generic IMSM ARRAY entry */
1459         struct mdinfo info;
1460         char nbuf[64];
1461         struct intel_super *super = st->sb;
1462
1463         if (!super->anchor->num_raid_devs) {
1464                 printf("ARRAY metadata=imsm\n");
1465                 return;
1466         }
1467
1468         getinfo_super_imsm(st, &info, NULL);
1469         fname_from_uuid(st, &info, nbuf, ':');
1470         printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1471 }
1472
1473 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1474 {
1475         /* We just write a generic IMSM ARRAY entry */
1476         struct mdinfo info;
1477         char nbuf[64];
1478         char nbuf1[64];
1479         struct intel_super *super = st->sb;
1480         int i;
1481
1482         if (!super->anchor->num_raid_devs)
1483                 return;
1484
1485         getinfo_super_imsm(st, &info, NULL);
1486         fname_from_uuid(st, &info, nbuf, ':');
1487         for (i = 0; i < super->anchor->num_raid_devs; i++) {
1488                 struct imsm_dev *dev = get_imsm_dev(super, i);
1489
1490                 super->current_vol = i;
1491                 getinfo_super_imsm(st, &info, NULL);
1492                 fname_from_uuid(st, &info, nbuf1, ':');
1493                 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1494                        dev->volume, nbuf + 5, i, nbuf1 + 5);
1495         }
1496 }
1497
1498 static void export_examine_super_imsm(struct supertype *st)
1499 {
1500         struct intel_super *super = st->sb;
1501         struct imsm_super *mpb = super->anchor;
1502         struct mdinfo info;
1503         char nbuf[64];
1504
1505         getinfo_super_imsm(st, &info, NULL);
1506         fname_from_uuid(st, &info, nbuf, ':');
1507         printf("MD_METADATA=imsm\n");
1508         printf("MD_LEVEL=container\n");
1509         printf("MD_UUID=%s\n", nbuf+5);
1510         printf("MD_DEVICES=%u\n", mpb->num_disks);
1511 }
1512
1513 static int copy_metadata_imsm(struct supertype *st, int from, int to)
1514 {
1515         /* The second last 512byte sector of the device contains
1516          * the "struct imsm_super" metadata.
1517          * This contains mpb_size which is the size in bytes of the
1518          * extended metadata.  This is located immediately before
1519          * the imsm_super.
1520          * We want to read all that, plus the last sector which
1521          * may contain a migration record, and write it all
1522          * to the target.
1523          */
1524         void *buf;
1525         unsigned long long dsize, offset;
1526         int sectors;
1527         struct imsm_super *sb;
1528         int written = 0;
1529
1530         if (posix_memalign(&buf, 4096, 4096) != 0)
1531                 return 1;
1532
1533         if (!get_dev_size(from, NULL, &dsize))
1534                 goto err;
1535
1536         if (lseek64(from, dsize-1024, 0) < 0)
1537                 goto err;
1538         if (read(from, buf, 512) != 512)
1539                 goto err;
1540         sb = buf;
1541         if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
1542                 goto err;
1543
1544         sectors = mpb_sectors(sb) + 2;
1545         offset = dsize - sectors * 512;
1546         if (lseek64(from, offset, 0) < 0 ||
1547             lseek64(to, offset, 0) < 0)
1548                 goto err;
1549         while (written < sectors * 512) {
1550                 int n = sectors*512 - written;
1551                 if (n > 4096)
1552                         n = 4096;
1553                 if (read(from, buf, n) != n)
1554                         goto err;
1555                 if (write(to, buf, n) != n)
1556                         goto err;
1557                 written += n;
1558         }
1559         free(buf);
1560         return 0;
1561 err:
1562         free(buf);
1563         return 1;
1564 }
1565
1566 static void detail_super_imsm(struct supertype *st, char *homehost)
1567 {
1568         struct mdinfo info;
1569         char nbuf[64];
1570
1571         getinfo_super_imsm(st, &info, NULL);
1572         fname_from_uuid(st, &info, nbuf, ':');
1573         printf("\n           UUID : %s\n", nbuf + 5);
1574 }
1575
1576 static void brief_detail_super_imsm(struct supertype *st)
1577 {
1578         struct mdinfo info;
1579         char nbuf[64];
1580         getinfo_super_imsm(st, &info, NULL);
1581         fname_from_uuid(st, &info, nbuf, ':');
1582         printf(" UUID=%s", nbuf + 5);
1583 }
1584
1585 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1586 static void fd2devname(int fd, char *name);
1587
1588 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
1589 {
1590         /* dump an unsorted list of devices attached to AHCI Intel storage
1591          * controller, as well as non-connected ports
1592          */
1593         int hba_len = strlen(hba_path) + 1;
1594         struct dirent *ent;
1595         DIR *dir;
1596         char *path = NULL;
1597         int err = 0;
1598         unsigned long port_mask = (1 << port_count) - 1;
1599
1600         if (port_count > (int)sizeof(port_mask) * 8) {
1601                 if (verbose > 0)
1602                         pr_err("port_count %d out of range\n", port_count);
1603                 return 2;
1604         }
1605
1606         /* scroll through /sys/dev/block looking for devices attached to
1607          * this hba
1608          */
1609         dir = opendir("/sys/dev/block");
1610         for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1611                 int fd;
1612                 char model[64];
1613                 char vendor[64];
1614                 char buf[1024];
1615                 int major, minor;
1616                 char *device;
1617                 char *c;
1618                 int port;
1619                 int type;
1620
1621                 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1622                         continue;
1623                 path = devt_to_devpath(makedev(major, minor));
1624                 if (!path)
1625                         continue;
1626                 if (!path_attached_to_hba(path, hba_path)) {
1627                         free(path);
1628                         path = NULL;
1629                         continue;
1630                 }
1631
1632                 /* retrieve the scsi device type */
1633                 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1634                         if (verbose > 0)
1635                                 pr_err("failed to allocate 'device'\n");
1636                         err = 2;
1637                         break;
1638                 }
1639                 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1640                 if (load_sys(device, buf) != 0) {
1641                         if (verbose > 0)
1642                                 pr_err("failed to read device type for %s\n",
1643                                         path);
1644                         err = 2;
1645                         free(device);
1646                         break;
1647                 }
1648                 type = strtoul(buf, NULL, 10);
1649
1650                 /* if it's not a disk print the vendor and model */
1651                 if (!(type == 0 || type == 7 || type == 14)) {
1652                         vendor[0] = '\0';
1653                         model[0] = '\0';
1654                         sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1655                         if (load_sys(device, buf) == 0) {
1656                                 strncpy(vendor, buf, sizeof(vendor));
1657                                 vendor[sizeof(vendor) - 1] = '\0';
1658                                 c = (char *) &vendor[sizeof(vendor) - 1];
1659                                 while (isspace(*c) || *c == '\0')
1660                                         *c-- = '\0';
1661
1662                         }
1663                         sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1664                         if (load_sys(device, buf) == 0) {
1665                                 strncpy(model, buf, sizeof(model));
1666                                 model[sizeof(model) - 1] = '\0';
1667                                 c = (char *) &model[sizeof(model) - 1];
1668                                 while (isspace(*c) || *c == '\0')
1669                                         *c-- = '\0';
1670                         }
1671
1672                         if (vendor[0] && model[0])
1673                                 sprintf(buf, "%.64s %.64s", vendor, model);
1674                         else
1675                                 switch (type) { /* numbers from hald/linux/device.c */
1676                                 case 1: sprintf(buf, "tape"); break;
1677                                 case 2: sprintf(buf, "printer"); break;
1678                                 case 3: sprintf(buf, "processor"); break;
1679                                 case 4:
1680                                 case 5: sprintf(buf, "cdrom"); break;
1681                                 case 6: sprintf(buf, "scanner"); break;
1682                                 case 8: sprintf(buf, "media_changer"); break;
1683                                 case 9: sprintf(buf, "comm"); break;
1684                                 case 12: sprintf(buf, "raid"); break;
1685                                 default: sprintf(buf, "unknown");
1686                                 }
1687                 } else
1688                         buf[0] = '\0';
1689                 free(device);
1690
1691                 /* chop device path to 'host%d' and calculate the port number */
1692                 c = strchr(&path[hba_len], '/');
1693                 if (!c) {
1694                         if (verbose > 0)
1695                                 pr_err("%s - invalid path name\n", path + hba_len);
1696                         err = 2;
1697                         break;
1698                 }
1699                 *c = '\0';
1700                 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1701                         port -= host_base;
1702                 else {
1703                         if (verbose > 0) {
1704                                 *c = '/'; /* repair the full string */
1705                                 pr_err("failed to determine port number for %s\n",
1706                                         path);
1707                         }
1708                         err = 2;
1709                         break;
1710                 }
1711
1712                 /* mark this port as used */
1713                 port_mask &= ~(1 << port);
1714
1715                 /* print out the device information */
1716                 if (buf[0]) {
1717                         printf("          Port%d : - non-disk device (%s) -\n", port, buf);
1718                         continue;
1719                 }
1720
1721                 fd = dev_open(ent->d_name, O_RDONLY);
1722                 if (fd < 0)
1723                         printf("          Port%d : - disk info unavailable -\n", port);
1724                 else {
1725                         fd2devname(fd, buf);
1726                         printf("          Port%d : %s", port, buf);
1727                         if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1728                                 printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
1729                         else
1730                                 printf(" ()\n");
1731                         close(fd);
1732                 }
1733                 free(path);
1734                 path = NULL;
1735         }
1736         if (path)
1737                 free(path);
1738         if (dir)
1739                 closedir(dir);
1740         if (err == 0) {
1741                 int i;
1742
1743                 for (i = 0; i < port_count; i++)
1744                         if (port_mask & (1 << i))
1745                                 printf("          Port%d : - no device attached -\n", i);
1746         }
1747
1748         return err;
1749 }
1750
1751 static void print_found_intel_controllers(struct sys_dev *elem)
1752 {
1753         for (; elem; elem = elem->next) {
1754                 pr_err("found Intel(R) ");
1755                 if (elem->type == SYS_DEV_SATA)
1756                         fprintf(stderr, "SATA ");
1757                 else if (elem->type == SYS_DEV_SAS)
1758                         fprintf(stderr, "SAS ");
1759                 fprintf(stderr, "RAID controller");
1760                 if (elem->pci_id)
1761                         fprintf(stderr, " at %s", elem->pci_id);
1762                 fprintf(stderr, ".\n");
1763         }
1764         fflush(stderr);
1765 }
1766
1767 static int ahci_get_port_count(const char *hba_path, int *port_count)
1768 {
1769         struct dirent *ent;
1770         DIR *dir;
1771         int host_base = -1;
1772
1773         *port_count = 0;
1774         if ((dir = opendir(hba_path)) == NULL)
1775                 return -1;
1776
1777         for (ent = readdir(dir); ent; ent = readdir(dir)) {
1778                 int host;
1779
1780                 if (sscanf(ent->d_name, "host%d", &host) != 1)
1781                         continue;
1782                 if (*port_count == 0)
1783                         host_base = host;
1784                 else if (host < host_base)
1785                         host_base = host;
1786
1787                 if (host + 1 > *port_count + host_base)
1788                         *port_count = host + 1 - host_base;
1789         }
1790         closedir(dir);
1791         return host_base;
1792 }
1793
1794 static void print_imsm_capability(const struct imsm_orom *orom)
1795 {
1796         printf("       Platform : Intel(R) Matrix Storage Manager\n");
1797         printf("        Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1798                orom->hotfix_ver, orom->build);
1799         printf("    RAID Levels :%s%s%s%s%s\n",
1800                imsm_orom_has_raid0(orom) ? " raid0" : "",
1801                imsm_orom_has_raid1(orom) ? " raid1" : "",
1802                imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1803                imsm_orom_has_raid10(orom) ? " raid10" : "",
1804                imsm_orom_has_raid5(orom) ? " raid5" : "");
1805         printf("    Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1806                imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1807                imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1808                imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1809                imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1810                imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1811                imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1812                imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1813                imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1814                imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1815                imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1816                imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1817                imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1818                imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1819                imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1820                imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1821                imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1822         printf("    2TB volumes :%s supported\n",
1823                (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not");
1824         printf("      2TB disks :%s supported\n",
1825                (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not");
1826         printf("      Max Disks : %d\n", orom->tds);
1827         printf("    Max Volumes : %d per array, %d per controller\n",
1828                orom->vpa, orom->vphba);
1829         return;
1830 }
1831
1832 static void print_imsm_capability_export(const struct imsm_orom *orom)
1833 {
1834         printf("MD_FIRMWARE_TYPE=imsm\n");
1835         printf("IMSM_VERSION=%d.%d.%d.%d\n",orom->major_ver, orom->minor_ver,
1836                         orom->hotfix_ver, orom->build);
1837         printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n",
1838                         imsm_orom_has_raid0(orom) ? "raid0 " : "",
1839                         imsm_orom_has_raid1(orom) ? "raid1 " : "",
1840                         imsm_orom_has_raid1e(orom) ? "raid1e " : "",
1841                         imsm_orom_has_raid5(orom) ? "raid10 " : "",
1842                         imsm_orom_has_raid10(orom) ? "raid5 " : "");
1843         printf("IMSM_SUPPORTED_CHUNK_SIZES=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1844                         imsm_orom_has_chunk(orom, 2) ? "2k " : "",
1845                         imsm_orom_has_chunk(orom, 4) ? "4k " : "",
1846                         imsm_orom_has_chunk(orom, 8) ? "8k " : "",
1847                         imsm_orom_has_chunk(orom, 16) ? "16k " : "",
1848                         imsm_orom_has_chunk(orom, 32) ? "32k " : "",
1849                         imsm_orom_has_chunk(orom, 64) ? "64k " : "",
1850                         imsm_orom_has_chunk(orom, 128) ? "128k " : "",
1851                         imsm_orom_has_chunk(orom, 256) ? "256k " : "",
1852                         imsm_orom_has_chunk(orom, 512) ? "512k " : "",
1853                         imsm_orom_has_chunk(orom, 1024*1) ? "1M " : "",
1854                         imsm_orom_has_chunk(orom, 1024*2) ? "2M " : "",
1855                         imsm_orom_has_chunk(orom, 1024*4) ? "4M " : "",
1856                         imsm_orom_has_chunk(orom, 1024*8) ? "8M " : "",
1857                         imsm_orom_has_chunk(orom, 1024*16) ? "16M " : "",
1858                         imsm_orom_has_chunk(orom, 1024*32) ? "32M " : "",
1859                         imsm_orom_has_chunk(orom, 1024*64) ? "64M " : "");
1860         printf("IMSM_2TB_VOLUMES=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB) ? "yes" : "no");
1861         printf("IMSM_2TB_DISKS=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB_DISK) ? "yes" : "no");
1862         printf("IMSM_MAX_DISKS=%d\n",orom->tds);
1863         printf("IMSM_MAX_VOLUMES_PER_ARRAY=%d\n",orom->vpa);
1864         printf("IMSM_MAX_VOLUMES_PER_CONTROLLER=%d\n",orom->vphba);
1865 }
1866
1867 static int detail_platform_imsm(int verbose, int enumerate_only, char *controller_path)
1868 {
1869         /* There are two components to imsm platform support, the ahci SATA
1870          * controller and the option-rom.  To find the SATA controller we
1871          * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1872          * controller with the Intel vendor id is present.  This approach
1873          * allows mdadm to leverage the kernel's ahci detection logic, with the
1874          * caveat that if ahci.ko is not loaded mdadm will not be able to
1875          * detect platform raid capabilities.  The option-rom resides in a
1876          * platform "Adapter ROM".  We scan for its signature to retrieve the
1877          * platform capabilities.  If raid support is disabled in the BIOS the
1878          * option-rom capability structure will not be available.
1879          */
1880         const struct imsm_orom *orom;
1881         struct sys_dev *list, *hba;
1882         int host_base = 0;
1883         int port_count = 0;
1884         int result=1;
1885
1886         if (enumerate_only) {
1887                 if (check_env("IMSM_NO_PLATFORM"))
1888                         return 0;
1889                 list = find_intel_devices();
1890                 if (!list)
1891                         return 2;
1892                 for (hba = list; hba; hba = hba->next) {
1893                         orom = find_imsm_capability(hba->type);
1894                         if (!orom) {
1895                                 result = 2;
1896                                 break;
1897                         }
1898                         else
1899                                 result = 0;
1900                 }
1901                 return result;
1902         }
1903
1904         list = find_intel_devices();
1905         if (!list) {
1906                 if (verbose > 0)
1907                         pr_err("no active Intel(R) RAID "
1908                                 "controller found.\n");
1909                 return 2;
1910         } else if (verbose > 0)
1911                 print_found_intel_controllers(list);
1912
1913         for (hba = list; hba; hba = hba->next) {
1914                 if (controller_path && (compare_paths(hba->path,controller_path) != 0))
1915                         continue;
1916                 orom = find_imsm_capability(hba->type);
1917                 if (!orom)
1918                         pr_err("imsm capabilities not found for controller: %s (type %s)\n",
1919                                 hba->path, get_sys_dev_type(hba->type));
1920                 else {
1921                         result = 0;
1922                         print_imsm_capability(orom);
1923                         printf(" I/O Controller : %s (%s)\n",
1924                                 hba->path, get_sys_dev_type(hba->type));
1925                         if (hba->type == SYS_DEV_SATA) {
1926                                 host_base = ahci_get_port_count(hba->path, &port_count);
1927                                 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1928                                         if (verbose > 0)
1929                                                 pr_err("failed to enumerate "
1930                                                         "ports on SATA controller at %s.\n", hba->pci_id);
1931                                         result |= 2;
1932                                 }
1933                         }
1934                 }
1935         }
1936
1937         if (controller_path && result == 1)
1938                 pr_err("no active Intel(R) RAID "
1939                                 "controller found under %s\n",controller_path);
1940
1941         return result;
1942 }
1943
1944 static int export_detail_platform_imsm(int verbose, char *controller_path)
1945 {
1946         const struct imsm_orom *orom;
1947         struct sys_dev *list, *hba;
1948         int result=1;
1949
1950         list = find_intel_devices();
1951         if (!list) {
1952                 if (verbose > 0)
1953                         pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_INTEL_DEVICES\n");
1954                 result = 2;
1955                 return result;
1956         }
1957
1958         for (hba = list; hba; hba = hba->next) {
1959                 if (controller_path && (compare_paths(hba->path,controller_path) != 0))
1960                         continue;
1961                 orom = find_imsm_capability(hba->type);
1962                 if (!orom) {
1963                         if (verbose > 0)
1964                                 pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",hba->path);
1965                 }
1966                 else {
1967                         print_imsm_capability_export(orom);
1968                         result = 0;
1969                 }
1970         }
1971
1972         return result;
1973 }
1974
1975 #endif
1976
1977 static int match_home_imsm(struct supertype *st, char *homehost)
1978 {
1979         /* the imsm metadata format does not specify any host
1980          * identification information.  We return -1 since we can never
1981          * confirm nor deny whether a given array is "meant" for this
1982          * host.  We rely on compare_super and the 'family_num' fields to
1983          * exclude member disks that do not belong, and we rely on
1984          * mdadm.conf to specify the arrays that should be assembled.
1985          * Auto-assembly may still pick up "foreign" arrays.
1986          */
1987
1988         return -1;
1989 }
1990
1991 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1992 {
1993         /* The uuid returned here is used for:
1994          *  uuid to put into bitmap file (Create, Grow)
1995          *  uuid for backup header when saving critical section (Grow)
1996          *  comparing uuids when re-adding a device into an array
1997          *    In these cases the uuid required is that of the data-array,
1998          *    not the device-set.
1999          *  uuid to recognise same set when adding a missing device back
2000          *    to an array.   This is a uuid for the device-set.
2001          *
2002          * For each of these we can make do with a truncated
2003          * or hashed uuid rather than the original, as long as
2004          * everyone agrees.
2005          * In each case the uuid required is that of the data-array,
2006          * not the device-set.
2007          */
2008         /* imsm does not track uuid's so we synthesis one using sha1 on
2009          * - The signature (Which is constant for all imsm array, but no matter)
2010          * - the orig_family_num of the container
2011          * - the index number of the volume
2012          * - the 'serial' number of the volume.
2013          * Hopefully these are all constant.
2014          */
2015         struct intel_super *super = st->sb;
2016
2017         char buf[20];
2018         struct sha1_ctx ctx;
2019         struct imsm_dev *dev = NULL;
2020         __u32 family_num;
2021
2022         /* some mdadm versions failed to set ->orig_family_num, in which
2023          * case fall back to ->family_num.  orig_family_num will be
2024          * fixed up with the first metadata update.
2025          */
2026         family_num = super->anchor->orig_family_num;
2027         if (family_num == 0)
2028                 family_num = super->anchor->family_num;
2029         sha1_init_ctx(&ctx);
2030         sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
2031         sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
2032         if (super->current_vol >= 0)
2033                 dev = get_imsm_dev(super, super->current_vol);
2034         if (dev) {
2035                 __u32 vol = super->current_vol;
2036                 sha1_process_bytes(&vol, sizeof(vol), &ctx);
2037                 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
2038         }
2039         sha1_finish_ctx(&ctx, buf);
2040         memcpy(uuid, buf, 4*4);
2041 }
2042
2043 #if 0
2044 static void
2045 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
2046 {
2047         __u8 *v = get_imsm_version(mpb);
2048         __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
2049         char major[] = { 0, 0, 0 };
2050         char minor[] = { 0 ,0, 0 };
2051         char patch[] = { 0, 0, 0 };
2052         char *ver_parse[] = { major, minor, patch };
2053         int i, j;
2054
2055         i = j = 0;
2056         while (*v != '\0' && v < end) {
2057                 if (*v != '.' && j < 2)
2058                         ver_parse[i][j++] = *v;
2059                 else {
2060                         i++;
2061                         j = 0;
2062                 }
2063                 v++;
2064         }
2065
2066         *m = strtol(minor, NULL, 0);
2067         *p = strtol(patch, NULL, 0);
2068 }
2069 #endif
2070
2071 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
2072 {
2073         /* migr_strip_size when repairing or initializing parity */
2074         struct imsm_map *map = get_imsm_map(dev, MAP_0);
2075         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2076
2077         switch (get_imsm_raid_level(map)) {
2078         case 5:
2079         case 10:
2080                 return chunk;
2081         default:
2082                 return 128*1024 >> 9;
2083         }
2084 }
2085
2086 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
2087 {
2088         /* migr_strip_size when rebuilding a degraded disk, no idea why
2089          * this is different than migr_strip_size_resync(), but it's good
2090          * to be compatible
2091          */
2092         struct imsm_map *map = get_imsm_map(dev, MAP_1);
2093         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2094
2095         switch (get_imsm_raid_level(map)) {
2096         case 1:
2097         case 10:
2098                 if (map->num_members % map->num_domains == 0)
2099                         return 128*1024 >> 9;
2100                 else
2101                         return chunk;
2102         case 5:
2103                 return max((__u32) 64*1024 >> 9, chunk);
2104         default:
2105                 return 128*1024 >> 9;
2106         }
2107 }
2108
2109 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
2110 {
2111         struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2112         struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2113         __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
2114         __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
2115
2116         return max((__u32) 1, hi_chunk / lo_chunk);
2117 }
2118
2119 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
2120 {
2121         struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2122         int level = get_imsm_raid_level(lo);
2123
2124         if (level == 1 || level == 10) {
2125                 struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2126
2127                 return hi->num_domains;
2128         } else
2129                 return num_stripes_per_unit_resync(dev);
2130 }
2131
2132 static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
2133 {
2134         /* named 'imsm_' because raid0, raid1 and raid10
2135          * counter-intuitively have the same number of data disks
2136          */
2137         struct imsm_map *map = get_imsm_map(dev, second_map);
2138
2139         switch (get_imsm_raid_level(map)) {
2140         case 0:
2141                 return map->num_members;
2142                 break;
2143         case 1:
2144         case 10:
2145                 return map->num_members/2;
2146         case 5:
2147                 return map->num_members - 1;
2148         default:
2149                 dprintf("%s: unsupported raid level\n", __func__);
2150                 return 0;
2151         }
2152 }
2153
2154 static __u32 parity_segment_depth(struct imsm_dev *dev)
2155 {
2156         struct imsm_map *map = get_imsm_map(dev, MAP_0);
2157         __u32 chunk =  __le32_to_cpu(map->blocks_per_strip);
2158
2159         switch(get_imsm_raid_level(map)) {
2160         case 1:
2161         case 10:
2162                 return chunk * map->num_domains;
2163         case 5:
2164                 return chunk * map->num_members;
2165         default:
2166                 return chunk;
2167         }
2168 }
2169
2170 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
2171 {
2172         struct imsm_map *map = get_imsm_map(dev, MAP_1);
2173         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2174         __u32 strip = block / chunk;
2175
2176         switch (get_imsm_raid_level(map)) {
2177         case 1:
2178         case 10: {
2179                 __u32 vol_strip = (strip * map->num_domains) + 1;
2180                 __u32 vol_stripe = vol_strip / map->num_members;
2181
2182                 return vol_stripe * chunk + block % chunk;
2183         } case 5: {
2184                 __u32 stripe = strip / (map->num_members - 1);
2185
2186                 return stripe * chunk + block % chunk;
2187         }
2188         default:
2189                 return 0;
2190         }
2191 }
2192
2193 static __u64 blocks_per_migr_unit(struct intel_super *super,
2194                                   struct imsm_dev *dev)
2195 {
2196         /* calculate the conversion factor between per member 'blocks'
2197          * (md/{resync,rebuild}_start) and imsm migration units, return
2198          * 0 for the 'not migrating' and 'unsupported migration' cases
2199          */
2200         if (!dev->vol.migr_state)
2201                 return 0;
2202
2203         switch (migr_type(dev)) {
2204         case MIGR_GEN_MIGR: {
2205                 struct migr_record *migr_rec = super->migr_rec;
2206                 return __le32_to_cpu(migr_rec->blocks_per_unit);
2207         }
2208         case MIGR_VERIFY:
2209         case MIGR_REPAIR:
2210         case MIGR_INIT: {
2211                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2212                 __u32 stripes_per_unit;
2213                 __u32 blocks_per_unit;
2214                 __u32 parity_depth;
2215                 __u32 migr_chunk;
2216                 __u32 block_map;
2217                 __u32 block_rel;
2218                 __u32 segment;
2219                 __u32 stripe;
2220                 __u8  disks;
2221
2222                 /* yes, this is really the translation of migr_units to
2223                  * per-member blocks in the 'resync' case
2224                  */
2225                 stripes_per_unit = num_stripes_per_unit_resync(dev);
2226                 migr_chunk = migr_strip_blocks_resync(dev);
2227                 disks = imsm_num_data_members(dev, MAP_0);
2228                 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
2229                 stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
2230                 segment = blocks_per_unit / stripe;
2231                 block_rel = blocks_per_unit - segment * stripe;
2232                 parity_depth = parity_segment_depth(dev);
2233                 block_map = map_migr_block(dev, block_rel);
2234                 return block_map + parity_depth * segment;
2235         }
2236         case MIGR_REBUILD: {
2237                 __u32 stripes_per_unit;
2238                 __u32 migr_chunk;
2239
2240                 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
2241                 migr_chunk = migr_strip_blocks_rebuild(dev);
2242                 return migr_chunk * stripes_per_unit;
2243         }
2244         case MIGR_STATE_CHANGE:
2245         default:
2246                 return 0;
2247         }
2248 }
2249
2250 static int imsm_level_to_layout(int level)
2251 {
2252         switch (level) {
2253         case 0:
2254         case 1:
2255                 return 0;
2256         case 5:
2257         case 6:
2258                 return ALGORITHM_LEFT_ASYMMETRIC;
2259         case 10:
2260                 return 0x102;
2261         }
2262         return UnSet;
2263 }
2264
2265 /*******************************************************************************
2266  * Function:    read_imsm_migr_rec
2267  * Description: Function reads imsm migration record from last sector of disk
2268  * Parameters:
2269  *      fd      : disk descriptor
2270  *      super   : metadata info
2271  * Returns:
2272  *       0 : success,
2273  *      -1 : fail
2274  ******************************************************************************/
2275 static int read_imsm_migr_rec(int fd, struct intel_super *super)
2276 {
2277         int ret_val = -1;
2278         unsigned long long dsize;
2279
2280         get_dev_size(fd, NULL, &dsize);
2281         if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
2282                 pr_err("Cannot seek to anchor block: %s\n",
2283                        strerror(errno));
2284                 goto out;
2285         }
2286         if (read(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
2287                                                             MIGR_REC_BUF_SIZE) {
2288                 pr_err("Cannot read migr record block: %s\n",
2289                        strerror(errno));
2290                 goto out;
2291         }
2292         ret_val = 0;
2293
2294 out:
2295         return ret_val;
2296 }
2297
2298 static struct imsm_dev *imsm_get_device_during_migration(
2299         struct intel_super *super)
2300 {
2301
2302         struct intel_dev *dv;
2303
2304         for (dv = super->devlist; dv; dv = dv->next) {
2305                 if (is_gen_migration(dv->dev))
2306                         return dv->dev;
2307         }
2308         return NULL;
2309 }
2310
2311 /*******************************************************************************
2312  * Function:    load_imsm_migr_rec
2313  * Description: Function reads imsm migration record (it is stored at the last
2314  *              sector of disk)
2315  * Parameters:
2316  *      super   : imsm internal array info
2317  *      info    : general array info
2318  * Returns:
2319  *       0 : success
2320  *      -1 : fail
2321  *      -2 : no migration in progress
2322  ******************************************************************************/
2323 static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
2324 {
2325         struct mdinfo *sd;
2326         struct dl *dl = NULL;
2327         char nm[30];
2328         int retval = -1;
2329         int fd = -1;
2330         struct imsm_dev *dev;
2331         struct imsm_map *map = NULL;
2332         int slot = -1;
2333
2334         /* find map under migration */
2335         dev = imsm_get_device_during_migration(super);
2336         /* nothing to load,no migration in progress?
2337         */
2338         if (dev == NULL)
2339                 return -2;
2340         map = get_imsm_map(dev, MAP_0);
2341
2342         if (info) {
2343                 for (sd = info->devs ; sd ; sd = sd->next) {
2344                         /* skip spare and failed disks
2345                          */
2346                         if (sd->disk.raid_disk < 0)
2347                                 continue;
2348                         /* read only from one of the first two slots */
2349                         if (map)
2350                                 slot = get_imsm_disk_slot(map,
2351                                                           sd->disk.raid_disk);
2352                         if ((map == NULL) || (slot > 1) || (slot < 0))
2353                                 continue;
2354
2355                         sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
2356                         fd = dev_open(nm, O_RDONLY);
2357                         if (fd >= 0)
2358                                 break;
2359                 }
2360         }
2361         if (fd < 0) {
2362                 for (dl = super->disks; dl; dl = dl->next) {
2363                         /* skip spare and failed disks
2364                         */
2365                         if (dl->index < 0)
2366                                 continue;
2367                         /* read only from one of the first two slots */
2368                         if (map)
2369                                 slot = get_imsm_disk_slot(map, dl->index);
2370                         if ((map == NULL) || (slot > 1) || (slot < 0))
2371                                 continue;
2372                         sprintf(nm, "%d:%d", dl->major, dl->minor);
2373                         fd = dev_open(nm, O_RDONLY);
2374                         if (fd >= 0)
2375                                 break;
2376                 }
2377         }
2378         if (fd < 0)
2379                 goto out;
2380         retval = read_imsm_migr_rec(fd, super);
2381
2382 out:
2383         if (fd >= 0)
2384                 close(fd);
2385         return retval;
2386 }
2387
2388 #ifndef MDASSEMBLE
2389 /*******************************************************************************
2390  * function: imsm_create_metadata_checkpoint_update
2391  * Description: It creates update for checkpoint change.
2392  * Parameters:
2393  *      super   : imsm internal array info
2394  *      u       : pointer to prepared update
2395  * Returns:
2396  *      Uptate length.
2397  *      If length is equal to 0, input pointer u contains no update
2398  ******************************************************************************/
2399 static int imsm_create_metadata_checkpoint_update(
2400         struct intel_super *super,
2401         struct imsm_update_general_migration_checkpoint **u)
2402 {
2403
2404         int update_memory_size = 0;
2405
2406         dprintf("imsm_create_metadata_checkpoint_update(enter)\n");
2407
2408         if (u == NULL)
2409                 return 0;
2410         *u = NULL;
2411
2412         /* size of all update data without anchor */
2413         update_memory_size =
2414                 sizeof(struct imsm_update_general_migration_checkpoint);
2415
2416         *u = xcalloc(1, update_memory_size);
2417         if (*u == NULL) {
2418                 dprintf("error: cannot get memory for "
2419                         "imsm_create_metadata_checkpoint_update update\n");
2420                 return 0;
2421         }
2422         (*u)->type = update_general_migration_checkpoint;
2423         (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
2424         dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n",
2425                 (*u)->curr_migr_unit);
2426
2427         return update_memory_size;
2428 }
2429
2430
2431 static void imsm_update_metadata_locally(struct supertype *st,
2432                                          void *buf, int len);
2433
2434 /*******************************************************************************
2435  * Function:    write_imsm_migr_rec
2436  * Description: Function writes imsm migration record
2437  *              (at the last sector of disk)
2438  * Parameters:
2439  *      super   : imsm internal array info
2440  * Returns:
2441  *       0 : success
2442  *      -1 : if fail
2443  ******************************************************************************/
2444 static int write_imsm_migr_rec(struct supertype *st)
2445 {
2446         struct intel_super *super = st->sb;
2447         unsigned long long dsize;
2448         char nm[30];
2449         int fd = -1;
2450         int retval = -1;
2451         struct dl *sd;
2452         int len;
2453         struct imsm_update_general_migration_checkpoint *u;
2454         struct imsm_dev *dev;
2455         struct imsm_map *map = NULL;
2456
2457         /* find map under migration */
2458         dev = imsm_get_device_during_migration(super);
2459         /* if no migration, write buffer anyway to clear migr_record
2460          * on disk based on first available device
2461         */
2462         if (dev == NULL)
2463                 dev = get_imsm_dev(super, super->current_vol < 0 ? 0 :
2464                                           super->current_vol);
2465
2466         map = get_imsm_map(dev, MAP_0);
2467
2468         for (sd = super->disks ; sd ; sd = sd->next) {
2469                 int slot = -1;
2470
2471                 /* skip failed and spare devices */
2472                 if (sd->index < 0)
2473                         continue;
2474                 /* write to 2 first slots only */
2475                 if (map)
2476                         slot = get_imsm_disk_slot(map, sd->index);
2477                 if ((map == NULL) || (slot > 1) || (slot < 0))
2478                         continue;
2479
2480                 sprintf(nm, "%d:%d", sd->major, sd->minor);
2481                 fd = dev_open(nm, O_RDWR);
2482                 if (fd < 0)
2483                         continue;
2484                 get_dev_size(fd, NULL, &dsize);
2485                 if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
2486                         pr_err("Cannot seek to anchor block: %s\n",
2487                                strerror(errno));
2488                         goto out;
2489                 }
2490                 if (write(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
2491                                                             MIGR_REC_BUF_SIZE) {
2492                         pr_err("Cannot write migr record block: %s\n",
2493                                strerror(errno));
2494                         goto out;
2495                 }
2496                 close(fd);
2497                 fd = -1;
2498         }
2499         /* update checkpoint information in metadata */
2500         len = imsm_create_metadata_checkpoint_update(super, &u);
2501
2502         if (len <= 0) {
2503                 dprintf("imsm: Cannot prepare update\n");
2504                 goto out;
2505         }
2506         /* update metadata locally */
2507         imsm_update_metadata_locally(st, u, len);
2508         /* and possibly remotely */
2509         if (st->update_tail) {
2510                 append_metadata_update(st, u, len);
2511                 /* during reshape we do all work inside metadata handler
2512                  * manage_reshape(), so metadata update has to be triggered
2513                  * insida it
2514                  */
2515                 flush_metadata_updates(st);
2516                 st->update_tail = &st->updates;
2517         } else
2518                 free(u);
2519
2520         retval = 0;
2521  out:
2522         if (fd >= 0)
2523                 close(fd);
2524         return retval;
2525 }
2526 #endif /* MDASSEMBLE */
2527
2528 /* spare/missing disks activations are not allowe when
2529  * array/container performs reshape operation, because
2530  * all arrays in container works on the same disks set
2531  */
2532 int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
2533 {
2534         int rv = 0;
2535         struct intel_dev *i_dev;
2536         struct imsm_dev *dev;
2537
2538         /* check whole container
2539          */
2540         for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) {
2541                 dev = i_dev->dev;
2542                 if (is_gen_migration(dev)) {
2543                         /* No repair during any migration in container
2544                          */
2545                         rv = 1;
2546                         break;
2547                 }
2548         }
2549         return rv;
2550 }
2551 static unsigned long long imsm_component_size_aligment_check(int level,
2552                                               int chunk_size,
2553                                               unsigned long long component_size)
2554 {
2555         unsigned int component_size_alligment;
2556
2557         /* check component size aligment
2558         */
2559         component_size_alligment = component_size % (chunk_size/512);
2560
2561         dprintf("imsm_component_size_aligment_check(Level: %i, "
2562                 "chunk_size = %i, component_size = %llu), "
2563                 "component_size_alligment = %u\n",
2564                 level, chunk_size, component_size,
2565                 component_size_alligment);
2566
2567         if (component_size_alligment && (level != 1) && (level != UnSet)) {
2568                 dprintf("imsm: reported component size alligned from %llu ",
2569                         component_size);
2570                 component_size -= component_size_alligment;
2571                 dprintf("to %llu (%i).\n",
2572                         component_size, component_size_alligment);
2573         }
2574
2575         return component_size;
2576 }
2577
2578 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
2579 {
2580         struct intel_super *super = st->sb;
2581         struct migr_record *migr_rec = super->migr_rec;
2582         struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
2583         struct imsm_map *map = get_imsm_map(dev, MAP_0);
2584         struct imsm_map *prev_map = get_imsm_map(dev, MAP_1);
2585         struct imsm_map *map_to_analyse = map;
2586         struct dl *dl;
2587         int map_disks = info->array.raid_disks;
2588
2589         memset(info, 0, sizeof(*info));
2590         if (prev_map)
2591                 map_to_analyse = prev_map;
2592
2593         dl = super->current_disk;
2594
2595         info->container_member    = super->current_vol;
2596         info->array.raid_disks    = map->num_members;
2597         info->array.level         = get_imsm_raid_level(map_to_analyse);
2598         info->array.layout        = imsm_level_to_layout(info->array.level);
2599         info->array.md_minor      = -1;
2600         info->array.ctime         = 0;
2601         info->array.utime         = 0;
2602         info->array.chunk_size    =
2603                 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
2604         info->array.state         = !dev->vol.dirty;
2605         info->custom_array_size   = __le32_to_cpu(dev->size_high);
2606         info->custom_array_size   <<= 32;
2607         info->custom_array_size   |= __le32_to_cpu(dev->size_low);
2608         info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
2609
2610         if (is_gen_migration(dev)) {
2611                 info->reshape_active = 1;
2612                 info->new_level = get_imsm_raid_level(map);
2613                 info->new_layout = imsm_level_to_layout(info->new_level);
2614                 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
2615                 info->delta_disks = map->num_members - prev_map->num_members;
2616                 if (info->delta_disks) {
2617                         /* this needs to be applied to every array
2618                          * in the container.
2619                          */
2620                         info->reshape_active = CONTAINER_RESHAPE;
2621                 }
2622                 /* We shape information that we give to md might have to be
2623                  * modify to cope with md's requirement for reshaping arrays.
2624                  * For example, when reshaping a RAID0, md requires it to be
2625                  * presented as a degraded RAID4.
2626                  * Also if a RAID0 is migrating to a RAID5 we need to specify
2627                  * the array as already being RAID5, but the 'before' layout
2628                  * is a RAID4-like layout.
2629                  */
2630                 switch (info->array.level) {
2631                 case 0:
2632                         switch(info->new_level) {
2633                         case 0:
2634                                 /* conversion is happening as RAID4 */
2635                                 info->array.level = 4;
2636                                 info->array.raid_disks += 1;
2637                                 break;
2638                         case 5:
2639                                 /* conversion is happening as RAID5 */
2640                                 info->array.level = 5;
2641                                 info->array.layout = ALGORITHM_PARITY_N;
2642                                 info->delta_disks -= 1;
2643                                 break;
2644                         default:
2645                                 /* FIXME error message */
2646                                 info->array.level = UnSet;
2647                                 break;
2648                         }
2649                         break;
2650                 }
2651         } else {
2652                 info->new_level = UnSet;
2653                 info->new_layout = UnSet;
2654                 info->new_chunk = info->array.chunk_size;
2655                 info->delta_disks = 0;
2656         }
2657
2658         if (dl) {
2659                 info->disk.major = dl->major;
2660                 info->disk.minor = dl->minor;
2661                 info->disk.number = dl->index;
2662                 info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse,
2663                                                           dl->index);
2664         }
2665
2666         info->data_offset         = pba_of_lba0(map_to_analyse);
2667         info->component_size      = blocks_per_member(map_to_analyse);
2668
2669         info->component_size = imsm_component_size_aligment_check(
2670                                                         info->array.level,
2671                                                         info->array.chunk_size,
2672                                                         info->component_size);
2673
2674         memset(info->uuid, 0, sizeof(info->uuid));
2675         info->recovery_start = MaxSector;
2676
2677         info->reshape_progress = 0;
2678         info->resync_start = MaxSector;
2679         if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
2680             dev->vol.dirty) &&
2681             imsm_reshape_blocks_arrays_changes(super) == 0) {
2682                 info->resync_start = 0;
2683         }
2684         if (dev->vol.migr_state) {
2685                 switch (migr_type(dev)) {
2686                 case MIGR_REPAIR:
2687                 case MIGR_INIT: {
2688                         __u64 blocks_per_unit = blocks_per_migr_unit(super,
2689                                                                      dev);
2690                         __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2691
2692                         info->resync_start = blocks_per_unit * units;
2693                         break;
2694                 }
2695                 case MIGR_GEN_MIGR: {
2696                         __u64 blocks_per_unit = blocks_per_migr_unit(super,
2697                                                                      dev);
2698                         __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
2699                         unsigned long long array_blocks;
2700                         int used_disks;
2701
2702                         if (__le32_to_cpu(migr_rec->ascending_migr) &&
2703                             (units <
2704                                 (__le32_to_cpu(migr_rec->num_migr_units)-1)) &&
2705                             (super->migr_rec->rec_status ==
2706                                         __cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
2707                                 units++;
2708
2709                         info->reshape_progress = blocks_per_unit * units;
2710
2711                         dprintf("IMSM: General Migration checkpoint : %llu "
2712                                "(%llu) -> read reshape progress : %llu\n",
2713                                 (unsigned long long)units,
2714                                 (unsigned long long)blocks_per_unit,
2715                                 info->reshape_progress);
2716
2717                         used_disks = imsm_num_data_members(dev, MAP_1);
2718                         if (used_disks > 0) {
2719                                 array_blocks = blocks_per_member(map) *
2720                                         used_disks;
2721                                 /* round array size down to closest MB
2722                                  */
2723                                 info->custom_array_size = (array_blocks
2724                                                 >> SECT_PER_MB_SHIFT)
2725                                                 << SECT_PER_MB_SHIFT;
2726                         }
2727                 }
2728                 case MIGR_VERIFY:
2729                         /* we could emulate the checkpointing of
2730                          * 'sync_action=check' migrations, but for now
2731                          * we just immediately complete them
2732                          */
2733                 case MIGR_REBUILD:
2734                         /* this is handled by container_content_imsm() */
2735                 case MIGR_STATE_CHANGE:
2736                         /* FIXME handle other migrations */
2737                 default:
2738                         /* we are not dirty, so... */
2739                         info->resync_start = MaxSector;
2740                 }
2741         }
2742
2743         strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2744         info->name[MAX_RAID_SERIAL_LEN] = 0;
2745
2746         info->array.major_version = -1;
2747         info->array.minor_version = -2;
2748         sprintf(info->text_version, "/%s/%d", st->container_devnm, info->container_member);
2749         info->safe_mode_delay = 4000;  /* 4 secs like the Matrix driver */
2750         uuid_from_super_imsm(st, info->uuid);
2751
2752         if (dmap) {
2753                 int i, j;
2754                 for (i=0; i<map_disks; i++) {
2755                         dmap[i] = 0;
2756                         if (i < info->array.raid_disks) {
2757                                 struct imsm_disk *dsk;
2758                                 j = get_imsm_disk_idx(dev, i, MAP_X);
2759                                 dsk = get_imsm_disk(super, j);
2760                                 if (dsk && (dsk->status & CONFIGURED_DISK))
2761                                         dmap[i] = 1;
2762                         }
2763                 }
2764         }
2765 }
2766
2767 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
2768                                 int failed, int look_in_map);
2769
2770 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
2771                              int look_in_map);
2772
2773
2774 #ifndef MDASSEMBLE
2775 static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
2776 {
2777         if (is_gen_migration(dev)) {
2778                 int failed;
2779                 __u8 map_state;
2780                 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
2781
2782                 failed = imsm_count_failed(super, dev, MAP_1);
2783                 map_state = imsm_check_degraded(super, dev, failed, MAP_1);
2784                 if (map2->map_state != map_state) {
2785                         map2->map_state = map_state;
2786                         super->updates_pending++;
2787                 }
2788         }
2789 }
2790 #endif
2791
2792 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
2793 {
2794         struct dl *d;
2795
2796         for (d = super->missing; d; d = d->next)
2797                 if (d->index == index)
2798                         return &d->disk;
2799         return NULL;
2800 }
2801
2802 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
2803 {
2804         struct intel_super *super = st->sb;
2805         struct imsm_disk *disk;
2806         int map_disks = info->array.raid_disks;
2807         int max_enough = -1;
2808         int i;
2809         struct imsm_super *mpb;
2810
2811         if (super->current_vol >= 0) {
2812                 getinfo_super_imsm_volume(st, info, map);
2813                 return;
2814         }
2815         memset(info, 0, sizeof(*info));
2816
2817         /* Set raid_disks to zero so that Assemble will always pull in valid
2818          * spares
2819          */
2820         info->array.raid_disks    = 0;
2821         info->array.level         = LEVEL_CONTAINER;
2822         info->array.layout        = 0;
2823         info->array.md_minor      = -1;
2824         info->array.ctime         = 0; /* N/A for imsm */
2825         info->array.utime         = 0;
2826         info->array.chunk_size    = 0;
2827
2828         info->disk.major = 0;
2829         info->disk.minor = 0;
2830         info->disk.raid_disk = -1;
2831         info->reshape_active = 0;
2832         info->array.major_version = -1;
2833         info->array.minor_version = -2;
2834         strcpy(info->text_version, "imsm");
2835         info->safe_mode_delay = 0;
2836         info->disk.number = -1;
2837         info->disk.state = 0;
2838         info->name[0] = 0;
2839         info->recovery_start = MaxSector;
2840         info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
2841
2842         /* do we have the all the insync disks that we expect? */
2843         mpb = super->anchor;
2844
2845         for (i = 0; i < mpb->num_raid_devs; i++) {
2846                 struct imsm_dev *dev = get_imsm_dev(super, i);
2847                 int failed, enough, j, missing = 0;
2848                 struct imsm_map *map;
2849                 __u8 state;
2850
2851                 failed = imsm_count_failed(super, dev, MAP_0);
2852                 state = imsm_check_degraded(super, dev, failed, MAP_0);
2853                 map = get_imsm_map(dev, MAP_0);
2854
2855                 /* any newly missing disks?
2856                  * (catches single-degraded vs double-degraded)
2857                  */
2858                 for (j = 0; j < map->num_members; j++) {
2859                         __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
2860                         __u32 idx = ord_to_idx(ord);
2861
2862                         if (!(ord & IMSM_ORD_REBUILD) &&
2863                             get_imsm_missing(super, idx)) {
2864                                 missing = 1;
2865                                 break;
2866                         }
2867                 }
2868
2869                 if (state == IMSM_T_STATE_FAILED)
2870                         enough = -1;
2871                 else if (state == IMSM_T_STATE_DEGRADED &&
2872                          (state != map->map_state || missing))
2873                         enough = 0;
2874                 else /* we're normal, or already degraded */
2875                         enough = 1;
2876                 if (is_gen_migration(dev) && missing) {
2877                         /* during general migration we need all disks
2878                          * that process is running on.
2879                          * No new missing disk is allowed.
2880                          */
2881                         max_enough = -1;
2882                         enough = -1;
2883                         /* no more checks necessary
2884                          */
2885                         break;
2886                 }
2887                 /* in the missing/failed disk case check to see
2888                  * if at least one array is runnable
2889                  */
2890                 max_enough = max(max_enough, enough);
2891         }
2892         dprintf("%s: enough: %d\n", __func__, max_enough);
2893         info->container_enough = max_enough;
2894
2895         if (super->disks) {
2896                 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2897
2898                 disk = &super->disks->disk;
2899                 info->data_offset = total_blocks(&super->disks->disk) - reserved;
2900                 info->component_size = reserved;
2901                 info->disk.state  = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
2902                 /* we don't change info->disk.raid_disk here because
2903                  * this state will be finalized in mdmon after we have
2904                  * found the 'most fresh' version of the metadata
2905                  */
2906                 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2907                 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2908         }
2909
2910         /* only call uuid_from_super_imsm when this disk is part of a populated container,
2911          * ->compare_super may have updated the 'num_raid_devs' field for spares
2912          */
2913         if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
2914                 uuid_from_super_imsm(st, info->uuid);
2915         else
2916                 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
2917
2918         /* I don't know how to compute 'map' on imsm, so use safe default */
2919         if (map) {
2920                 int i;
2921                 for (i = 0; i < map_disks; i++)
2922                         map[i] = 1;
2923         }
2924
2925 }
2926
2927 /* allocates memory and fills disk in mdinfo structure
2928  * for each disk in array */
2929 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2930 {
2931         struct mdinfo *mddev = NULL;
2932         struct intel_super *super = st->sb;
2933         struct imsm_disk *disk;
2934         int count = 0;
2935         struct dl *dl;
2936         if (!super || !super->disks)
2937                 return NULL;
2938         dl = super->disks;
2939         mddev = xcalloc(1, sizeof(*mddev));
2940         while (dl) {
2941                 struct mdinfo *tmp;
2942                 disk = &dl->disk;
2943                 tmp = xcalloc(1, sizeof(*tmp));
2944                 if (mddev->devs)
2945                         tmp->next = mddev->devs;
2946                 mddev->devs = tmp;
2947                 tmp->disk.number = count++;
2948                 tmp->disk.major = dl->major;
2949                 tmp->disk.minor = dl->minor;
2950                 tmp->disk.state = is_configured(disk) ?
2951                                   (1 << MD_DISK_ACTIVE) : 0;
2952                 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2953                 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2954                 tmp->disk.raid_disk = -1;
2955                 dl = dl->next;
2956         }
2957         return mddev;
2958 }
2959
2960 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2961                              char *update, char *devname, int verbose,
2962                              int uuid_set, char *homehost)
2963 {
2964         /* For 'assemble' and 'force' we need to return non-zero if any
2965          * change was made.  For others, the return value is ignored.
2966          * Update options are:
2967          *  force-one : This device looks a bit old but needs to be included,
2968          *        update age info appropriately.
2969          *  assemble: clear any 'faulty' flag to allow this device to
2970          *              be assembled.
2971          *  force-array: Array is degraded but being forced, mark it clean
2972          *         if that will be needed to assemble it.
2973          *
2974          *  newdev:  not used ????
2975          *  grow:  Array has gained a new device - this is currently for
2976          *              linear only
2977          *  resync: mark as dirty so a resync will happen.
2978          *  name:  update the name - preserving the homehost
2979          *  uuid:  Change the uuid of the array to match watch is given
2980          *
2981          * Following are not relevant for this imsm:
2982          *  sparc2.2 : update from old dodgey metadata
2983          *  super-minor: change the preferred_minor number
2984          *  summaries:  update redundant counters.
2985          *  homehost:  update the recorded homehost
2986          *  _reshape_progress: record new reshape_progress position.
2987          */
2988         int rv = 1;
2989         struct intel_super *super = st->sb;
2990         struct imsm_super *mpb;
2991
2992         /* we can only update container info */
2993         if (!super || super->current_vol >= 0 || !super->anchor)
2994                 return 1;
2995
2996         mpb = super->anchor;
2997
2998         if (strcmp(update, "uuid") == 0) {
2999                 /* We take this to mean that the family_num should be updated.
3000                  * However that is much smaller than the uuid so we cannot really
3001                  * allow an explicit uuid to be given.  And it is hard to reliably
3002                  * know if one was.
3003                  * So if !uuid_set we know the current uuid is random and just used
3004                  * the first 'int' and copy it to the other 3 positions.
3005                  * Otherwise we require the 4 'int's to be the same as would be the
3006                  * case if we are using a random uuid.  So an explicit uuid will be
3007                  * accepted as long as all for ints are the same... which shouldn't hurt
3008                  */
3009                 if (!uuid_set) {
3010                         info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0];
3011                         rv = 0;
3012                 } else {
3013                         if (info->uuid[0] != info->uuid[1] ||
3014                             info->uuid[1] != info->uuid[2] ||
3015                             info->uuid[2] != info->uuid[3])
3016                                 rv = -1;
3017                         else
3018                                 rv = 0;
3019                 }
3020                 if (rv == 0)
3021                         mpb->orig_family_num = info->uuid[0];
3022         } else if (strcmp(update, "assemble") == 0)
3023                 rv = 0;
3024         else
3025                 rv = -1;
3026
3027         /* successful update? recompute checksum */
3028         if (rv == 0)
3029                 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
3030
3031         return rv;
3032 }
3033
3034 static size_t disks_to_mpb_size(int disks)
3035 {
3036         size_t size;
3037
3038         size = sizeof(struct imsm_super);
3039         size += (disks - 1) * sizeof(struct imsm_disk);
3040         size += 2 * sizeof(struct imsm_dev);
3041         /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
3042         size += (4 - 2) * sizeof(struct imsm_map);
3043         /* 4 possible disk_ord_tbl's */
3044         size += 4 * (disks - 1) * sizeof(__u32);
3045
3046         return size;
3047 }
3048
3049 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize,
3050                              unsigned long long data_offset)
3051 {
3052         if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
3053                 return 0;
3054
3055         return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
3056 }
3057
3058 static void free_devlist(struct intel_super *super)
3059 {
3060         struct intel_dev *dv;
3061
3062         while (super->devlist) {
3063                 dv = super->devlist->next;
3064                 free(super->devlist->dev);
3065                 free(super->devlist);
3066                 super->devlist = dv;
3067         }
3068 }
3069
3070 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
3071 {
3072         memcpy(dest, src, sizeof_imsm_dev(src, 0));
3073 }
3074
3075 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
3076 {
3077         /*
3078          * return:
3079          *  0 same, or first was empty, and second was copied
3080          *  1 second had wrong number
3081          *  2 wrong uuid
3082          *  3 wrong other info
3083          */
3084         struct intel_super *first = st->sb;
3085         struct intel_super *sec = tst->sb;
3086
3087         if (!first) {
3088                 st->sb = tst->sb;
3089                 tst->sb = NULL;
3090                 return 0;
3091         }
3092         /* in platform dependent environment test if the disks
3093          * use the same Intel hba
3094          * If not on Intel hba at all, allow anything.
3095          */
3096         if (!check_env("IMSM_NO_PLATFORM")) {
3097                 if (first->hba && sec->hba &&
3098                     strcmp(first->hba->path, sec->hba->path) != 0)  {
3099                         fprintf(stderr,
3100                                 "HBAs of devices does not match %s != %s\n",
3101                                 first->hba ? first->hba->path : NULL,
3102                                 sec->hba ? sec->hba->path : NULL);
3103                         return 3;
3104                 }
3105         }
3106
3107         /* if an anchor does not have num_raid_devs set then it is a free
3108          * floating spare
3109          */
3110         if (first->anchor->num_raid_devs > 0 &&
3111             sec->anchor->num_raid_devs > 0) {
3112                 /* Determine if these disks might ever have been
3113                  * related.  Further disambiguation can only take place
3114                  * in load_super_imsm_all
3115                  */
3116                 __u32 first_family = first->anchor->orig_family_num;
3117                 __u32 sec_family = sec->anchor->orig_family_num;
3118
3119                 if (memcmp(first->anchor->sig, sec->anchor->sig,
3120                            MAX_SIGNATURE_LENGTH) != 0)
3121                         return 3;
3122
3123                 if (first_family == 0)
3124                         first_family = first->anchor->family_num;
3125                 if (sec_family == 0)
3126                         sec_family = sec->anchor->family_num;
3127
3128                 if (first_family != sec_family)
3129                         return 3;
3130
3131         }
3132
3133
3134         /* if 'first' is a spare promote it to a populated mpb with sec's
3135          * family number
3136          */
3137         if (first->anchor->num_raid_devs == 0 &&
3138             sec->anchor->num_raid_devs > 0) {
3139                 int i;
3140                 struct intel_dev *dv;
3141                 struct imsm_dev *dev;
3142
3143                 /* we need to copy raid device info from sec if an allocation
3144                  * fails here we don't associate the spare
3145                  */
3146                 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
3147                         dv = xmalloc(sizeof(*dv));
3148                         dev = xmalloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
3149                         dv->dev = dev;
3150                         dv->index = i;
3151                         dv->next = first->devlist;
3152                         first->devlist = dv;
3153                 }
3154                 if (i < sec->anchor->num_raid_devs) {
3155                         /* allocation failure */
3156                         free_devlist(first);
3157                         fprintf(stderr, "imsm: failed to associate spare\n");
3158                         return 3;
3159                 }
3160                 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
3161                 first->anchor->orig_family_num = sec->anchor->orig_family_num;
3162                 first->anchor->family_num = sec->anchor->family_num;
3163                 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
3164                 for (i = 0; i < sec->anchor->num_raid_devs; i++)
3165                         imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
3166         }
3167
3168         return 0;
3169 }
3170
3171 static void fd2devname(int fd, char *name)
3172 {
3173         struct stat st;
3174         char path[256];
3175         char dname[PATH_MAX];
3176         char *nm;
3177         int rv;
3178
3179         name[0] = '\0';
3180         if (fstat(fd, &st) != 0)
3181                 return;
3182         sprintf(path, "/sys/dev/block/%d:%d",
3183                 major(st.st_rdev), minor(st.st_rdev));
3184
3185         rv = readlink(path, dname, sizeof(dname)-1);
3186         if (rv <= 0)
3187                 return;
3188
3189         dname[rv] = '\0';
3190         nm = strrchr(dname, '/');
3191         if (nm) {
3192                 nm++;
3193                 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
3194         }
3195 }
3196
3197 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
3198
3199 static int imsm_read_serial(int fd, char *devname,
3200                             __u8 serial[MAX_RAID_SERIAL_LEN])
3201 {
3202         unsigned char scsi_serial[255];
3203         int rv;
3204         int rsp_len;
3205         int len;
3206         char *dest;
3207         char *src;
3208         char *rsp_buf;
3209         int i;
3210
3211         memset(scsi_serial, 0, sizeof(scsi_serial));
3212
3213         rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
3214
3215         if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
3216                 memset(serial, 0, MAX_RAID_SERIAL_LEN);
3217                 fd2devname(fd, (char *) serial);
3218                 return 0;
3219         }
3220
3221         if (rv != 0) {
3222                 if (devname)
3223                         pr_err("Failed to retrieve serial for %s\n",
3224                                devname);
3225                 return rv;
3226         }
3227
3228         rsp_len = scsi_serial[3];
3229         if (!rsp_len) {
3230                 if (devname)
3231                         pr_err("Failed to retrieve serial for %s\n",
3232                                devname);
3233                 return 2;
3234         }
3235         rsp_buf = (char *) &scsi_serial[4];
3236
3237         /* trim all whitespace and non-printable characters and convert
3238          * ':' to ';'
3239          */
3240         for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
3241                 src = &rsp_buf[i];
3242                 if (*src > 0x20) {
3243                         /* ':' is reserved for use in placeholder serial
3244                          * numbers for missing disks
3245                          */
3246                         if (*src == ':')
3247                                 *dest++ = ';';
3248                         else
3249                                 *dest++ = *src;
3250                 }
3251         }
3252         len = dest - rsp_buf;
3253         dest = rsp_buf;
3254
3255         /* truncate leading characters */
3256         if (len > MAX_RAID_SERIAL_LEN) {
3257                 dest += len - MAX_RAID_SERIAL_LEN;
3258                 len = MAX_RAID_SERIAL_LEN;
3259         }
3260
3261         memset(serial, 0, MAX_RAID_SERIAL_LEN);
3262         memcpy(serial, dest, len);
3263
3264         return 0;
3265 }
3266
3267 static int serialcmp(__u8 *s1, __u8 *s2)
3268 {
3269         return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
3270 }
3271
3272 static void serialcpy(__u8 *dest, __u8 *src)
3273 {
3274         strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
3275 }
3276
3277 static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
3278 {
3279         struct dl *dl;
3280
3281         for (dl = super->disks; dl; dl = dl->next)
3282                 if (serialcmp(dl->serial, serial) == 0)
3283                         break;
3284
3285         return dl;
3286 }
3287
3288 static struct imsm_disk *
3289 __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
3290 {
3291         int i;
3292
3293         for (i = 0; i < mpb->num_disks; i++) {
3294                 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3295
3296                 if (serialcmp(disk->serial, serial) == 0) {
3297                         if (idx)
3298                                 *idx = i;
3299                         return disk;
3300                 }
3301         }
3302
3303         return NULL;
3304 }
3305
3306 static int
3307 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
3308 {
3309         struct imsm_disk *disk;
3310         struct dl *dl;
3311         struct stat stb;
3312         int rv;
3313         char name[40];
3314         __u8 serial[MAX_RAID_SERIAL_LEN];
3315
3316         rv = imsm_read_serial(fd, devname, serial);
3317
3318         if (rv != 0)
3319                 return 2;
3320
3321         dl = xcalloc(1, sizeof(*dl));
3322
3323         fstat(fd, &stb);
3324         dl->major = major(stb.st_rdev);
3325         dl->minor = minor(stb.st_rdev);
3326         dl->next = super->disks;
3327         dl->fd = keep_fd ? fd : -1;
3328         assert(super->disks == NULL);
3329         super->disks = dl;
3330         serialcpy(dl->serial, serial);
3331         dl->index = -2;
3332         dl->e = NULL;
3333         fd2devname(fd, name);
3334         if (devname)
3335                 dl->devname = xstrdup(devname);
3336         else
3337                 dl->devname = xstrdup(name);
3338
3339         /* look up this disk's index in the current anchor */
3340         disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
3341         if (disk) {
3342                 dl->disk = *disk;
3343                 /* only set index on disks that are a member of a
3344                  * populated contianer, i.e. one with raid_devs
3345                  */
3346                 if (is_failed(&dl->disk))
3347                         dl->index = -2;
3348                 else if (is_spare(&dl->disk))
3349                         dl->index = -1;
3350         }
3351
3352         return 0;
3353 }
3354
3355 #ifndef MDASSEMBLE
3356 /* When migrating map0 contains the 'destination' state while map1
3357  * contains the current state.  When not migrating map0 contains the
3358  * current state.  This routine assumes that map[0].map_state is set to
3359  * the current array state before being called.
3360  *
3361  * Migration is indicated by one of the following states
3362  * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
3363  * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
3364  *    map1state=unitialized)
3365  * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR  map0state=normal
3366  *    map1state=normal)
3367  * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
3368  *    map1state=degraded)
3369  * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
3370  *    map1state=normal)
3371  */
3372 static void migrate(struct imsm_dev *dev, struct intel_super *super,
3373                     __u8 to_state, int migr_type)
3374 {
3375         struct imsm_map *dest;
3376         struct imsm_map *src = get_imsm_map(dev, MAP_0);
3377
3378         dev->vol.migr_state = 1;
3379         set_migr_type(dev, migr_type);
3380         dev->vol.curr_migr_unit = 0;
3381         dest = get_imsm_map(dev, MAP_1);
3382
3383         /* duplicate and then set the target end state in map[0] */
3384         memcpy(dest, src, sizeof_imsm_map(src));
3385         if ((migr_type == MIGR_REBUILD) ||
3386             (migr_type ==  MIGR_GEN_MIGR)) {
3387                 __u32 ord;
3388                 int i;
3389
3390                 for (i = 0; i < src->num_members; i++) {
3391                         ord = __le32_to_cpu(src->disk_ord_tbl[i]);
3392                         set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
3393                 }
3394         }
3395
3396         if (migr_type == MIGR_GEN_MIGR)
3397                 /* Clear migration record */
3398                 memset(super->migr_rec, 0, sizeof(struct migr_record));
3399
3400         src->map_state = to_state;
3401 }
3402
3403 static void end_migration(struct imsm_dev *dev, struct intel_super *super,
3404                           __u8 map_state)
3405 {
3406         struct imsm_map *map = get_imsm_map(dev, MAP_0);
3407         struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state == 0 ?
3408                                                     MAP_0 : MAP_1);
3409         int i, j;
3410
3411         /* merge any IMSM_ORD_REBUILD bits that were not successfully
3412          * completed in the last migration.
3413          *
3414          * FIXME add support for raid-level-migration
3415          */
3416         if ((map_state != map->map_state) && (is_gen_migration(dev) == 0) &&
3417                 (prev->map_state != IMSM_T_STATE_UNINITIALIZED)) {
3418                 /* when final map state is other than expected
3419                  * merge maps (not for migration)
3420                  */
3421                 int failed;
3422
3423                 for (i = 0; i < prev->num_members; i++)
3424                         for (j = 0; j < map->num_members; j++)
3425                                 /* during online capacity expansion
3426                                  * disks position can be changed
3427                                  * if takeover is used
3428                                  */
3429                                 if (ord_to_idx(map->disk_ord_tbl[j]) ==
3430                                     ord_to_idx(prev->disk_ord_tbl[i])) {
3431                                         map->disk_ord_tbl[j] |=
3432                                                 prev->disk_ord_tbl[i];
3433                                         break;
3434                                 }
3435                 failed = imsm_count_failed(super, dev, MAP_0);
3436                 map_state = imsm_check_degraded(super, dev, failed, MAP_0);
3437         }
3438
3439         dev->vol.migr_state = 0;
3440         set_migr_type(dev, 0);
3441         dev->vol.curr_migr_unit = 0;
3442         map->map_state = map_state;
3443 }
3444 #endif
3445
3446 static int parse_raid_devices(struct intel_super *super)
3447 {
3448         int i;
3449         struct imsm_dev *dev_new;
3450         size_t len, len_migr;
3451         size_t max_len = 0;
3452         size_t space_needed = 0;
3453         struct imsm_super *mpb = super->anchor;
3454
3455         for (i = 0; i < super->anchor->num_raid_devs; i++) {
3456                 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
3457                 struct intel_dev *dv;
3458
3459                 len = sizeof_imsm_dev(dev_iter, 0);
3460                 len_migr = sizeof_imsm_dev(dev_iter, 1);
3461                 if (len_migr > len)
3462                         space_needed += len_migr - len;
3463
3464                 dv = xmalloc(sizeof(*dv));
3465                 if (max_len < len_migr)
3466                         max_len = len_migr;
3467                 if (max_len > len_migr)
3468                         space_needed += max_len - len_migr;
3469                 dev_new = xmalloc(max_len);
3470                 imsm_copy_dev(dev_new, dev_iter);
3471                 dv->dev = dev_new;
3472                 dv->index = i;
3473                 dv->next = super->devlist;
3474                 super->devlist = dv;
3475         }
3476
3477         /* ensure that super->buf is large enough when all raid devices
3478          * are migrating
3479          */
3480         if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
3481                 void *buf;
3482
3483                 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
3484                 if (posix_memalign(&buf, 512, len) != 0)
3485                         return 1;
3486
3487                 memcpy(buf, super->buf, super->len);
3488                 memset(buf + super->len, 0, len - super->len);
3489                 free(super->buf);
3490                 super->buf = buf;
3491                 super->len = len;
3492         }
3493
3494         return 0;
3495 }
3496
3497 /* retrieve a pointer to the bbm log which starts after all raid devices */
3498 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
3499 {
3500         void *ptr = NULL;
3501
3502         if (__le32_to_cpu(mpb->bbm_log_size)) {
3503                 ptr = mpb;
3504                 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
3505         }
3506
3507         return ptr;
3508 }
3509
3510 /*******************************************************************************
3511  * Function:    check_mpb_migr_compatibility
3512  * Description: Function checks for unsupported migration features:
3513  *              - migration optimization area (pba_of_lba0)
3514  *              - descending reshape (ascending_migr)
3515  * Parameters:
3516  *      super   : imsm metadata information
3517  * Returns:
3518  *       0 : migration is compatible
3519  *      -1 : migration is not compatible
3520  ******************************************************************************/
3521 int check_mpb_migr_compatibility(struct intel_super *super)
3522 {
3523         struct imsm_map *map0, *map1;
3524         struct migr_record *migr_rec = super->migr_rec;
3525         int i;
3526
3527         for (i = 0; i < super->anchor->num_raid_devs; i++) {
3528                 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
3529
3530                 if (dev_iter &&
3531                     dev_iter->vol.migr_state == 1 &&
3532                     dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
3533                         /* This device is migrating */
3534                         map0 = get_imsm_map(dev_iter, MAP_0);
3535                         map1 = get_imsm_map(dev_iter, MAP_1);
3536                         if (pba_of_lba0(map0) != pba_of_lba0(map1))
3537                                 /* migration optimization area was used */
3538                                 return -1;
3539                         if (migr_rec->ascending_migr == 0
3540                                 && migr_rec->dest_depth_per_unit > 0)
3541                                 /* descending reshape not supported yet */
3542                                 return -1;
3543                 }
3544         }
3545         return 0;
3546 }
3547
3548 static void __free_imsm(struct intel_super *super, int free_disks);
3549
3550 /* load_imsm_mpb - read matrix metadata
3551  * allocates super->mpb to be freed by free_imsm
3552  */
3553 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
3554 {
3555         unsigned long long dsize;
3556         unsigned long long sectors;
3557         struct stat;
3558         struct imsm_super *anchor;
3559         __u32 check_sum;
3560
3561         get_dev_size(fd, NULL, &dsize);
3562         if (dsize < 1024) {
3563                 if (devname)
3564                         pr_err("%s: device to small for imsm\n",
3565                                devname);
3566                 return 1;
3567         }
3568
3569         if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
3570                 if (devname)
3571                         pr_err("Cannot seek to anchor block on %s: %s\n",
3572                                devname, strerror(errno));
3573                 return 1;
3574         }
3575
3576         if (posix_memalign((void**)&anchor, 512, 512) != 0) {
3577                 if (devname)
3578                         pr_err("Failed to allocate imsm anchor buffer"
3579                                " on %s\n", devname);
3580                 return 1;
3581         }
3582         if (read(fd, anchor, 512) != 512) {
3583                 if (devname)
3584                         pr_err("Cannot read anchor block on %s: %s\n",
3585                                devname, strerror(errno));
3586                 free(anchor);
3587                 return 1;
3588         }
3589
3590         if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
3591                 if (devname)
3592                         pr_err("no IMSM anchor on %s\n", devname);
3593                 free(anchor);
3594                 return 2;
3595         }
3596
3597         __free_imsm(super, 0);
3598         /*  reload capability and hba */
3599
3600         /* capability and hba must be updated with new super allocation */
3601         find_intel_hba_capability(fd, super, devname);
3602         super->len = ROUND_UP(anchor->mpb_size, 512);
3603         if (posix_memalign(&super->buf, 512, super->len) != 0) {
3604                 if (devname)
3605                         pr_err("unable to allocate %zu byte mpb buffer\n",
3606                                super->len);
3607                 free(anchor);
3608                 return 2;
3609         }
3610         memcpy(super->buf, anchor, 512);
3611
3612         sectors = mpb_sectors(anchor) - 1;
3613         free(anchor);
3614
3615         if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
3616                 pr_err("%s could not allocate migr_rec buffer\n", __func__);
3617                 free(super->buf);
3618                 return 2;
3619         }
3620         super->clean_migration_record_by_mdmon = 0;
3621
3622         if (!sectors) {
3623                 check_sum = __gen_imsm_checksum(super->anchor);
3624                 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3625                         if (devname)
3626                                 pr_err("IMSM checksum %x != %x on %s\n",
3627                                        check_sum,
3628                                        __le32_to_cpu(super->anchor->check_sum),
3629                                        devname);
3630                         return 2;
3631                 }
3632
3633                 return 0;
3634         }
3635
3636         /* read the extended mpb */
3637         if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
3638                 if (devname)
3639                         pr_err("Cannot seek to extended mpb on %s: %s\n",
3640                                devname, strerror(errno));
3641                 return 1;
3642         }
3643
3644         if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
3645                 if (devname)
3646                         pr_err("Cannot read extended mpb on %s: %s\n",
3647                                devname, strerror(errno));
3648                 return 2;
3649         }
3650
3651         check_sum = __gen_imsm_checksum(super->anchor);
3652         if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3653                 if (devname)
3654                         pr_err("IMSM checksum %x != %x on %s\n",
3655                                check_sum, __le32_to_cpu(super->anchor->check_sum),
3656                                devname);
3657                 return 3;
3658         }
3659
3660         /* FIXME the BBM log is disk specific so we cannot use this global
3661          * buffer for all disks.  Ok for now since we only look at the global
3662          * bbm_log_size parameter to gate assembly
3663          */
3664         super->bbm_log = __get_imsm_bbm_log(super->anchor);
3665
3666         return 0;
3667 }
3668
3669 static int read_imsm_migr_rec(int fd, struct intel_super *super);
3670
3671 /* clears hi bits in metadata if MPB_ATTRIB_2TB_DISK not set */
3672 static void clear_hi(struct intel_super *super)
3673 {
3674         struct imsm_super *mpb = super->anchor;
3675         int i, n;
3676         if (mpb->attributes & MPB_ATTRIB_2TB_DISK)
3677                 return;
3678         for (i = 0; i < mpb->num_disks; ++i) {
3679                 struct imsm_disk *disk = &mpb->disk[i];
3680                 disk->total_blocks_hi = 0;
3681         }
3682         for (i = 0; i < mpb->num_raid_devs; ++i) {
3683                 struct imsm_dev *dev = get_imsm_dev(super, i);
3684                 if (!dev)
3685                         return;
3686                 for (n = 0; n < 2; ++n) {
3687                         struct imsm_map *map = get_imsm_map(dev, n);
3688                         if (!map)
3689                                 continue;
3690                         map->pba_of_lba0_hi = 0;
3691                         map->blocks_per_member_hi = 0;
3692                         map->num_data_stripes_hi = 0;
3693                 }
3694         }
3695 }
3696
3697 static int
3698 load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
3699 {
3700         int err;
3701
3702         err = load_imsm_mpb(fd, super, devname);
3703         if (err)
3704                 return err;
3705         err = load_imsm_disk(fd, super, devname, keep_fd);
3706         if (err)
3707                 return err;
3708         err = parse_raid_devices(super);
3709         clear_hi(super);
3710         return err;
3711 }
3712
3713 static void __free_imsm_disk(struct dl *d)
3714 {
3715         if (d->fd >= 0)
3716                 close(d->fd);
3717         if (d->devname)
3718                 free(d->devname);
3719         if (d->e)
3720                 free(d->e);
3721         free(d);
3722
3723 }
3724
3725 static void free_imsm_disks(struct intel_super *super)
3726 {
3727         struct dl *d;
3728
3729         while (super->disks) {
3730                 d = super->disks;
3731                 super->disks = d->next;
3732                 __free_imsm_disk(d);
3733         }
3734         while (super->disk_mgmt_list) {
3735                 d = super->disk_mgmt_list;
3736                 super->disk_mgmt_list = d->next;
3737                 __free_imsm_disk(d);
3738         }
3739         while (super->missing) {
3740                 d = super->missing;
3741                 super->missing = d->next;
3742                 __free_imsm_disk(d);
3743         }
3744
3745 }
3746
3747 /* free all the pieces hanging off of a super pointer */
3748 static void __free_imsm(struct intel_super *super, int free_disks)
3749 {
3750         struct intel_hba *elem, *next;
3751
3752         if (super->buf) {
3753                 free(super->buf);
3754                 super->buf = NULL;
3755         }
3756         /* unlink capability description */
3757         super->orom = NULL;
3758         if (super->migr_rec_buf) {
3759                 free(super->migr_rec_buf);
3760                 super->migr_rec_buf = NULL;
3761         }
3762         if (free_disks)
3763                 free_imsm_disks(super);
3764         free_devlist(super);
3765         elem = super->hba;
3766         while (elem) {
3767                 if (elem->path)
3768                         free((void *)elem->path);
3769                 next = elem->next;
3770                 free(elem);
3771                 elem = next;
3772         }
3773         super->hba = NULL;
3774 }
3775
3776 static void free_imsm(struct intel_super *super)
3777 {
3778         __free_imsm(super, 1);
3779         free(super);
3780 }
3781
3782 static void free_super_imsm(struct supertype *st)
3783 {
3784         struct intel_super *super = st->sb;
3785
3786         if (!super)
3787                 return;
3788
3789         free_imsm(super);
3790         st->sb = NULL;
3791 }
3792
3793 static struct intel_super *alloc_super(void)
3794 {
3795         struct intel_super *super = xcalloc(1, sizeof(*super));
3796
3797         super->current_vol = -1;
3798         super->create_offset = ~((unsigned long long) 0);
3799         return super;
3800 }
3801
3802 /*
3803  * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3804  */
3805 static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
3806 {
3807         struct sys_dev *hba_name;
3808         int rv = 0;
3809
3810         if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
3811                 super->orom = NULL;
3812                 super->hba = NULL;
3813                 return 0;
3814         }
3815         hba_name = find_disk_attached_hba(fd, NULL);
3816         if (!hba_name) {
3817                 if (devname)
3818                         pr_err("%s is not attached to Intel(R) RAID controller.\n",
3819                                devname);
3820                 return 1;
3821         }
3822         rv = attach_hba_to_super(super, hba_name);
3823         if (rv == 2) {
3824                 if (devname) {
3825                         struct intel_hba *hba = super->hba;
3826
3827                         pr_err("%s is attached to Intel(R) %s RAID "
3828                                 "controller (%s),\n"
3829                                 "    but the container is assigned to Intel(R) "
3830                                 "%s RAID controller (",
3831                                 devname,
3832                                 hba_name->path,
3833                                 hba_name->pci_id ? : "Err!",
3834                                 get_sys_dev_type(hba_name->type));
3835
3836                         while (hba) {
3837                                 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3838                                 if (hba->next)
3839                                         fprintf(stderr, ", ");
3840                                 hba = hba->next;
3841                         }
3842
3843                         fprintf(stderr, ").\n");
3844                         cont_err("Mixing devices attached to multiple controllers "
3845                                  "is not allowed.\n");
3846                 }
3847                 return 2;
3848         }
3849         super->orom = find_imsm_capability(hba_name->type);
3850         if (!super->orom)
3851                 return 3;
3852         return 0;
3853 }
3854
3855 /* find_missing - helper routine for load_super_imsm_all that identifies
3856  * disks that have disappeared from the system.  This routine relies on
3857  * the mpb being uptodate, which it is at load time.
3858  */
3859 static int find_missing(struct intel_super *super)
3860 {
3861         int i;
3862         struct imsm_super *mpb = super->anchor;
3863         struct dl *dl;
3864         struct imsm_disk *disk;
3865
3866         for (i = 0; i < mpb->num_disks; i++) {
3867                 disk = __get_imsm_disk(mpb, i);
3868                 dl = serial_to_dl(disk->serial, super);
3869                 if (dl)
3870                         continue;
3871
3872                 dl = xmalloc(sizeof(*dl));
3873                 dl->major = 0;
3874                 dl->minor = 0;
3875                 dl->fd = -1;
3876                 dl->devname = xstrdup("missing");
3877                 dl->index = i;
3878                 serialcpy(dl->serial, disk->serial);
3879                 dl->disk = *disk;
3880                 dl->e = NULL;
3881                 dl->next = super->missing;
3882                 super->missing = dl;
3883         }
3884
3885         return 0;
3886 }
3887
3888 #ifndef MDASSEMBLE
3889 static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
3890 {
3891         struct intel_disk *idisk = disk_list;
3892
3893         while (idisk) {
3894                 if (serialcmp(idisk->disk.serial, serial) == 0)
3895                         break;
3896                 idisk = idisk->next;
3897         }
3898
3899         return idisk;
3900 }
3901
3902 static int __prep_thunderdome(struct intel_super **table, int tbl_size,
3903                               struct intel_super *super,
3904                               struct intel_disk **disk_list)
3905 {
3906         struct imsm_disk *d = &super->disks->disk;
3907         struct imsm_super *mpb = super->anchor;
3908         int i, j;
3909
3910         for (i = 0; i < tbl_size; i++) {
3911                 struct imsm_super *tbl_mpb = table[i]->anchor;
3912                 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3913
3914                 if (tbl_mpb->family_num == mpb->family_num) {
3915                         if (tbl_mpb->check_sum == mpb->check_sum) {
3916                                 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3917                                         __func__, super->disks->major,
3918                                         super->disks->minor,
3919                                         table[i]->disks->major,
3920                                         table[i]->disks->minor);
3921                                 break;
3922                         }
3923
3924                         if (((is_configured(d) && !is_configured(tbl_d)) ||
3925                              is_configured(d) == is_configured(tbl_d)) &&
3926                             tbl_mpb->generation_num < mpb->generation_num) {
3927                                 /* current version of the mpb is a
3928                                  * better candidate than the one in
3929                                  * super_table, but copy over "cross
3930                                  * generational" status
3931                                  */
3932                                 struct intel_disk *idisk;
3933
3934                                 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3935                                         __func__, super->disks->major,
3936                                         super->disks->minor,
3937                                         table[i]->disks->major,
3938                                         table[i]->disks->minor);
3939
3940                                 idisk = disk_list_get(tbl_d->serial, *disk_list);
3941                                 if (idisk && is_failed(&idisk->disk))
3942                                         tbl_d->status |= FAILED_DISK;
3943                                 break;
3944                         } else {
3945                                 struct intel_disk *idisk;
3946                                 struct imsm_disk *disk;
3947
3948                                 /* tbl_mpb is more up to date, but copy
3949                                  * over cross generational status before
3950                                  * returning
3951                                  */
3952                                 disk = __serial_to_disk(d->serial, mpb, NULL);
3953                                 if (disk && is_failed(disk))
3954                                         d->status |= FAILED_DISK;
3955
3956                                 idisk = disk_list_get(d->serial, *disk_list);
3957                                 if (idisk) {
3958                                         idisk->owner = i;
3959                                         if (disk && is_configured(disk))
3960                                                 idisk->disk.status |= CONFIGURED_DISK;
3961                                 }
3962
3963                                 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3964                                         __func__, super->disks->major,
3965                                         super->disks->minor,
3966                                         table[i]->disks->major,
3967                                         table[i]->disks->minor);
3968
3969                                 return tbl_size;
3970                         }
3971                 }
3972         }
3973
3974         if (i >= tbl_size)
3975                 table[tbl_size++] = super;
3976         else
3977                 table[i] = super;
3978
3979         /* update/extend the merged list of imsm_disk records */
3980         for (j = 0; j < mpb->num_disks; j++) {
3981                 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3982                 struct intel_disk *idisk;
3983
3984                 idisk = disk_list_get(disk->serial, *disk_list);
3985                 if (idisk) {
3986                         idisk->disk.status |= disk->status;
3987                         if (is_configured(&idisk->disk) ||
3988                             is_failed(&idisk->disk))
3989                                 idisk->disk.status &= ~(SPARE_DISK);
3990                 } else {
3991                         idisk = xcalloc(1, sizeof(*idisk));
3992                         idisk->owner = IMSM_UNKNOWN_OWNER;
3993                         idisk->disk = *disk;
3994                         idisk->next = *disk_list;
3995                         *disk_list = idisk;
3996                 }
3997
3998                 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3999                         idisk->owner = i;
4000         }
4001
4002         return tbl_size;
4003 }
4004
4005 static struct intel_super *
4006 validate_members(struct intel_super *super, struct intel_disk *disk_list,
4007                  const int owner)
4008 {
4009         struct imsm_super *mpb = super->anchor;
4010         int ok_count = 0;
4011         int i;
4012
4013         for (i = 0; i < mpb->num_disks; i++) {
4014                 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
4015                 struct intel_disk *idisk;
4016
4017                 idisk = disk_list_get(disk->serial, disk_list);
4018                 if (idisk) {
4019                         if (idisk->owner == owner ||
4020                             idisk->owner == IMSM_UNKNOWN_OWNER)
4021                                 ok_count++;
4022                         else
4023                                 dprintf("%s: '%.16s' owner %d != %d\n",
4024                                         __func__, disk->serial, idisk->owner,
4025                                         owner);
4026                 } else {
4027                         dprintf("%s: unknown disk %x [%d]: %.16s\n",
4028                                 __func__, __le32_to_cpu(mpb->family_num), i,
4029                                 disk->serial);
4030                         break;
4031                 }
4032         }
4033
4034         if (ok_count == mpb->num_disks)
4035                 return super;
4036         return NULL;
4037 }
4038
4039 static void show_conflicts(__u32 family_num, struct intel_super *super_list)
4040 {
4041         struct intel_super *s;
4042
4043         for (s = super_list; s; s = s->next) {
4044                 if (family_num != s->anchor->family_num)
4045                         continue;
4046                 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
4047                         __le32_to_cpu(family_num), s->disks->devname);
4048         }
4049 }
4050
4051 static struct intel_super *
4052 imsm_thunderdome(struct intel_super **super_list, int len)
4053 {
4054         struct intel_super *super_table[len];
4055         struct intel_disk *disk_list = NULL;
4056         struct intel_super *champion, *spare;
4057         struct intel_super *s, **del;
4058         int tbl_size = 0;
4059         int conflict;
4060         int i;
4061
4062         memset(super_table, 0, sizeof(super_table));
4063         for (s = *super_list; s; s = s->next)
4064                 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
4065
4066         for (i = 0; i < tbl_size; i++) {
4067                 struct imsm_disk *d;
4068                 struct intel_disk *idisk;
4069                 struct imsm_super *mpb = super_table[i]->anchor;
4070
4071                 s = super_table[i];
4072                 d = &s->disks->disk;
4073
4074                 /* 'd' must appear in merged disk list for its
4075                  * configuration to be valid
4076                  */
4077                 idisk = disk_list_get(d->serial, disk_list);
4078                 if (idisk && idisk->owner == i)
4079                         s = validate_members(s, disk_list, i);
4080                 else
4081                         s = NULL;
4082
4083                 if (!s)
4084                         dprintf("%s: marking family: %#x from %d:%d offline\n",
4085                                 __func__, mpb->family_num,
4086                                 super_table[i]->disks->major,
4087                                 super_table[i]->disks->minor);
4088                 super_table[i] = s;
4089         }
4090
4091         /* This is where the mdadm implementation differs from the Windows
4092          * driver which has no strict concept of a container.  We can only
4093          * assemble one family from a container, so when returning a prodigal
4094          * array member to this system the code will not be able to disambiguate
4095          * the container contents that should be assembled ("foreign" versus
4096          * "local").  It requires user intervention to set the orig_family_num
4097          * to a new value to establish a new container.  The Windows driver in
4098          * this situation fixes up the volume name in place and manages the
4099          * foreign array as an independent entity.
4100          */
4101         s = NULL;
4102         spare = NULL;
4103         conflict = 0;
4104         for (i = 0; i < tbl_size; i++) {
4105                 struct intel_super *tbl_ent = super_table[i];
4106                 int is_spare = 0;
4107
4108                 if (!tbl_ent)
4109                         continue;
4110
4111                 if (tbl_ent->anchor->num_raid_devs == 0) {
4112                         spare = tbl_ent;
4113                         is_spare = 1;
4114                 }
4115
4116                 if (s && !is_spare) {
4117                         show_conflicts(tbl_ent->anchor->family_num, *super_list);
4118                         conflict++;
4119                 } else if (!s && !is_spare)
4120                         s = tbl_ent;
4121         }
4122
4123         if (!s)
4124                 s = spare;
4125         if (!s) {
4126                 champion = NULL;
4127                 goto out;
4128         }
4129         champion = s;
4130
4131         if (conflict)
4132                 fprintf(stderr, "Chose family %#x on '%s', "
4133                         "assemble conflicts to new container with '--update=uuid'\n",
4134                         __le32_to_cpu(s->anchor->family_num), s->disks->devname);
4135
4136         /* collect all dl's onto 'champion', and update them to
4137          * champion's version of the status
4138          */
4139         for (s = *super_list; s; s = s->next) {
4140                 struct imsm_super *mpb = champion->anchor;
4141                 struct dl *dl = s->disks;
4142
4143                 if (s == champion)
4144                         continue;
4145
4146                 mpb->attributes |= s->anchor->attributes & MPB_ATTRIB_2TB_DISK;
4147
4148                 for (i = 0; i < mpb->num_disks; i++) {
4149                         struct imsm_disk *disk;
4150
4151                         disk = __serial_to_disk(dl->serial, mpb, &dl->index);
4152                         if (disk) {
4153                                 dl->disk = *disk;
4154                                 /* only set index on disks that are a member of
4155                                  * a populated contianer, i.e. one with
4156                                  * raid_devs
4157                                  */
4158                                 if (is_failed(&dl->disk))
4159                                         dl->index = -2;
4160                                 else if (is_spare(&dl->disk))
4161                                         dl->index = -1;
4162                                 break;
4163                         }
4164                 }
4165
4166                 if (i >= mpb->num_disks) {
4167                         struct intel_disk *idisk;
4168
4169                         idisk = disk_list_get(dl->serial, disk_list);
4170                         if (idisk && is_spare(&idisk->disk) &&
4171                             !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
4172                                 dl->index = -1;
4173                         else {
4174                                 dl->index = -2;
4175                                 continue;
4176                         }
4177                 }
4178
4179                 dl->next = champion->disks;
4180                 champion->disks = dl;
4181                 s->disks = NULL;
4182         }
4183
4184         /* delete 'champion' from super_list */
4185         for (del = super_list; *del; ) {
4186                 if (*del == champion) {
4187                         *del = (*del)->next;
4188                         break;
4189                 } else
4190                         del = &(*del)->next;
4191         }
4192         champion->next = NULL;
4193
4194  out:
4195         while (disk_list) {
4196                 struct intel_disk *idisk = disk_list;
4197
4198                 disk_list = disk_list->next;
4199                 free(idisk);
4200         }
4201
4202         return champion;
4203 }
4204
4205
4206 static int
4207 get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd);
4208 static int get_super_block(struct intel_super **super_list, char *devnm, char *devname,
4209                            int major, int minor, int keep_fd);
4210 static int
4211 get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list,
4212                         int *max, int keep_fd);
4213
4214
4215 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
4216                                char *devname, struct md_list *devlist,
4217                                int keep_fd)
4218 {
4219         struct intel_super *super_list = NULL;
4220         struct intel_super *super = NULL;
4221         int err = 0;
4222         int i = 0;
4223
4224         if (fd >= 0)
4225                 /* 'fd' is an opened container */
4226                 err = get_sra_super_block(fd, &super_list, devname, &i, keep_fd);
4227         else
4228                 /* get super block from devlist devices */
4229                 err = get_devlist_super_block(devlist, &super_list, &i, keep_fd);
4230         if (err)
4231                 goto error;
4232         /* all mpbs enter, maybe one leaves */
4233         super = imsm_thunderdome(&super_list, i);
4234         if (!super) {
4235                 err = 1;
4236                 goto error;
4237         }
4238
4239         if (find_missing(super) != 0) {
4240                 free_imsm(super);
4241                 err = 2;
4242                 goto error;
4243         }
4244
4245         /* load migration record */
4246         err = load_imsm_migr_rec(super, NULL);
4247         if (err == -1) {
4248                 /* migration is in progress,
4249                  * but migr_rec cannot be loaded,
4250                  */
4251                 err = 4;
4252                 goto error;
4253         }
4254
4255         /* Check migration compatibility */
4256         if ((err == 0) && (check_mpb_migr_compatibility(super) != 0)) {
4257                 pr_err("Unsupported migration detected");
4258                 if (devname)
4259                         fprintf(stderr, " on %s\n", devname);
4260                 else
4261                         fprintf(stderr, " (IMSM).\n");
4262
4263                 err = 5;
4264                 goto error;
4265         }
4266
4267         err = 0;
4268
4269  error:
4270         while (super_list) {
4271                 struct intel_super *s = super_list;
4272
4273                 super_list = super_list->next;
4274                 free_imsm(s);
4275         }
4276
4277
4278         if (err)
4279                 return err;
4280
4281         *sbp = super;
4282         if (fd >= 0)
4283                 strcpy(st->container_devnm, fd2devnm(fd));
4284         else
4285                 st->container_devnm[0] = 0;
4286         if (err == 0 && st->ss == NULL) {
4287                 st->ss = &super_imsm;
4288                 st->minor_version = 0;
4289                 st->max_devs = IMSM_MAX_DEVICES;
4290         }
4291         return 0;
4292 }
4293
4294
4295 static int
4296 get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list,
4297                         int *max, int keep_fd)
4298 {
4299         struct md_list *tmpdev;
4300         int err = 0;
4301         int i = 0;
4302
4303         for (i = 0, tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
4304                 if (tmpdev->used != 1)
4305                         continue;
4306                 if (tmpdev->container == 1) {
4307                         int lmax = 0;
4308                         int fd = dev_open(tmpdev->devname, O_RDONLY|O_EXCL);
4309                         if (fd < 0) {
4310                                 pr_err("cannot open device %s: %s\n",
4311                                         tmpdev->devname, strerror(errno));
4312                                 err = 8;
4313                                 goto error;
4314                         }
4315                         err = get_sra_super_block(fd, super_list,
4316                                                   tmpdev->devname, &lmax,
4317                                                   keep_fd);
4318                         i += lmax;
4319                         close(fd);
4320                         if (err) {
4321                                 err = 7;
4322                                 goto error;
4323                         }
4324                 } else {
4325                         int major = major(tmpdev->st_rdev);
4326                         int minor = minor(tmpdev->st_rdev);
4327                         err = get_super_block(super_list,
4328                                               NULL,
4329                                               tmpdev->devname,
4330                                               major, minor,
4331                                               keep_fd);
4332                         i++;
4333                         if (err) {
4334                                 err = 6;
4335                                 goto error;
4336                         }
4337                 }
4338         }
4339  error:
4340         *max = i;
4341         return err;
4342 }
4343
4344 static int get_super_block(struct intel_super **super_list, char *devnm, char *devname,
4345                            int major, int minor, int keep_fd)
4346 {
4347         struct intel_super*s = NULL;
4348         char nm[32];
4349         int dfd = -1;
4350         int err = 0;
4351         int retry;
4352
4353         s = alloc_super();
4354         if (!s) {
4355                 err = 1;
4356                 goto error;
4357         }
4358
4359         sprintf(nm, "%d:%d", major, minor);
4360         dfd = dev_open(nm, O_RDWR);
4361         if (dfd < 0) {
4362                 err = 2;
4363                 goto error;
4364         }
4365
4366         find_intel_hba_capability(dfd, s, devname);
4367         err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
4368
4369         /* retry the load if we might have raced against mdmon */
4370         if (err == 3 && devnm && mdmon_running(devnm))
4371                 for (retry = 0; retry < 3; retry++) {
4372                         usleep(3000);
4373                         err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
4374                         if (err != 3)
4375                                 break;
4376                 }
4377  error:
4378         if (!err) {
4379                 s->next = *super_list;
4380                 *super_list = s;
4381         } else {
4382                 if (s)
4383                         free(s);
4384                 if (dfd >= 0)
4385                         close(dfd);
4386         }
4387         if ((dfd >= 0) && (!keep_fd))
4388                 close(dfd);
4389         return err;
4390
4391 }
4392
4393 static int
4394 get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd)
4395 {
4396         struct mdinfo *sra;
4397         char *devnm;
4398         struct mdinfo *sd;
4399         int err = 0;
4400         int i = 0;
4401         sra = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
4402         if (!sra)
4403                 return 1;
4404
4405         if (sra->array.major_version != -1 ||
4406             sra->array.minor_version != -2 ||
4407             strcmp(sra->text_version, "imsm") != 0) {
4408                 err = 1;
4409                 goto error;
4410         }
4411         /* load all mpbs */
4412         devnm = fd2devnm(fd);
4413         for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
4414                 if (get_super_block(super_list, devnm, devname,
4415                                     sd->disk.major, sd->disk.minor, keep_fd) != 0) {
4416                         err = 7;
4417                         goto error;
4418                 }
4419         }
4420  error:
4421         sysfs_free(sra);
4422         *max = i;
4423         return err;
4424 }
4425
4426 static int load_container_imsm(struct supertype *st, int fd, char *devname)
4427 {
4428         return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1);
4429 }
4430 #endif
4431
4432 static int load_super_imsm(struct supertype *st, int fd, char *devname)
4433 {
4434         struct intel_super *super;
4435         int rv;
4436
4437         if (!st->ignore_hw_compat && test_partition(fd))
4438                 /* IMSM not allowed on partitions */
4439                 return 1;
4440
4441         free_super_imsm(st);
4442
4443         super = alloc_super();
4444         /* Load hba and capabilities if they exist.
4445          * But do not preclude loading metadata in case capabilities or hba are
4446          * non-compliant and ignore_hw_compat is set.
4447          */
4448         rv = find_intel_hba_capability(fd, super, devname);
4449         /* no orom/efi or non-intel hba of the disk */
4450         if ((rv != 0) && (st->ignore_hw_compat == 0)) {
4451                 if (devname)
4452                         pr_err("No OROM/EFI properties for %s\n", devname);
4453                 free_imsm(super);
4454                 return 2;
4455         }
4456         rv = load_and_parse_mpb(fd, super, devname, 0);
4457
4458         if (rv) {
4459                 if (devname)
4460                         pr_err("Failed to load all information "
4461                                "sections on %s\n", devname);
4462                 free_imsm(super);
4463                 return rv;
4464         }
4465
4466         st->sb = super;
4467         if (st->ss == NULL) {
4468                 st->ss = &super_imsm;
4469                 st->minor_version = 0;
4470                 st->max_devs = IMSM_MAX_DEVICES;
4471         }
4472
4473         /* load migration record */
4474         if (load_imsm_migr_rec(super, NULL) == 0) {
4475                 /* Check for unsupported migration features */
4476                 if (check_mpb_migr_compatibility(super) != 0) {
4477                         pr_err("Unsupported migration detected");
4478                         if (devname)
4479                                 fprintf(stderr, " on %s\n", devname);
4480                         else
4481                                 fprintf(stderr, " (IMSM).\n");
4482                         return 3;
4483                 }
4484         }
4485
4486         return 0;
4487 }
4488
4489 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
4490 {
4491         if (info->level == 1)
4492                 return 128;
4493         return info->chunk_size >> 9;
4494 }
4495
4496 static unsigned long long info_to_blocks_per_member(mdu_array_info_t *info,
4497                                                     unsigned long long size)
4498 {
4499         if (info->level == 1)
4500                 return size * 2;
4501         else
4502                 return (size * 2) & ~(info_to_blocks_per_strip(info) - 1);
4503 }
4504
4505 static void imsm_update_version_info(struct intel_super *super)
4506 {
4507         /* update the version and attributes */
4508         struct imsm_super *mpb = super->anchor;
4509         char *version;
4510         struct imsm_dev *dev;
4511         struct imsm_map *map;
4512         int i;
4513
4514         for (i = 0; i < mpb->num_raid_devs; i++) {
4515                 dev = get_imsm_dev(super, i);
4516                 map = get_imsm_map(dev, MAP_0);
4517                 if (__le32_to_cpu(dev->size_high) > 0)
4518                         mpb->attributes |= MPB_ATTRIB_2TB;
4519
4520                 /* FIXME detect when an array spans a port multiplier */
4521                 #if 0
4522                 mpb->attributes |= MPB_ATTRIB_PM;
4523                 #endif
4524
4525                 if (mpb->num_raid_devs > 1 ||
4526                     mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
4527                         version = MPB_VERSION_ATTRIBS;
4528                         switch (get_imsm_raid_level(map)) {
4529                         case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
4530                         case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
4531                         case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
4532                         case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
4533                         }
4534                 } else {
4535                         if (map->num_members >= 5)
4536                                 version = MPB_VERSION_5OR6_DISK_ARRAY;
4537                         else if (dev->status == DEV_CLONE_N_GO)
4538                                 version = MPB_VERSION_CNG;
4539                         else if (get_imsm_raid_level(map) == 5)
4540                                 version = MPB_VERSION_RAID5;
4541                         else if (map->num_members >= 3)
4542                                 version = MPB_VERSION_3OR4_DISK_ARRAY;
4543                         else if (get_imsm_raid_level(map) == 1)
4544                                 version = MPB_VERSION_RAID1;
4545                         else
4546                                 version = MPB_VERSION_RAID0;
4547                 }
4548                 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
4549         }
4550 }
4551
4552 static int check_name(struct intel_super *super, char *name, int quiet)
4553 {
4554         struct imsm_super *mpb = super->anchor;
4555         char *reason = NULL;
4556         int i;
4557
4558         if (strlen(name) > MAX_RAID_SERIAL_LEN)
4559                 reason = "must be 16 characters or less";
4560
4561         for (i = 0; i < mpb->num_raid_devs; i++) {
4562                 struct imsm_dev *dev = get_imsm_dev(super, i);
4563
4564                 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
4565                         reason = "already exists";
4566                         break;
4567                 }
4568         }
4569
4570         if (reason && !quiet)
4571                 pr_err("imsm volume name %s\n", reason);
4572
4573         return !reason;
4574 }
4575
4576 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
4577                                   unsigned long long size, char *name,
4578                                   char *homehost, int *uuid,
4579                                   long long data_offset)
4580 {
4581         /* We are creating a volume inside a pre-existing container.
4582          * so st->sb is already set.
4583          */
4584         struct intel_super *super = st->sb;
4585         struct imsm_super *mpb = super->anchor;
4586         struct intel_dev *dv;
4587         struct imsm_dev *dev;
4588         struct imsm_vol *vol;
4589         struct imsm_map *map;
4590         int idx = mpb->num_raid_devs;
4591         int i;
4592         unsigned long long array_blocks;
4593         size_t size_old, size_new;
4594         unsigned long long num_data_stripes;
4595
4596         if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
4597                 pr_err("This imsm-container already has the "
4598                         "maximum of %d volumes\n", super->orom->vpa);
4599                 return 0;
4600         }
4601
4602         /* ensure the mpb is large enough for the new data */
4603         size_old = __le32_to_cpu(mpb->mpb_size);
4604         size_new = disks_to_mpb_size(info->nr_disks);
4605         if (size_new > size_old) {
4606                 void *mpb_new;
4607                 size_t size_round = ROUND_UP(size_new, 512);
4608
4609                 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
4610                         pr_err("could not allocate new mpb\n");
4611                         return 0;
4612                 }
4613                 if (posix_memalign(&super->migr_rec_buf, 512,
4614                                    MIGR_REC_BUF_SIZE) != 0) {
4615                         pr_err("%s could not allocate migr_rec buffer\n",
4616                                __func__);
4617                         free(super->buf);
4618                         free(super);
4619                         free(mpb_new);
4620                         return 0;
4621                 }
4622                 memcpy(mpb_new, mpb, size_old);
4623                 free(mpb);
4624                 mpb = mpb_new;
4625                 super->anchor = mpb_new;
4626                 mpb->mpb_size = __cpu_to_le32(size_new);
4627                 memset(mpb_new + size_old, 0, size_round - size_old);
4628         }
4629         super->current_vol = idx;
4630
4631         /* handle 'failed_disks' by either:
4632          * a) create dummy disk entries in the table if this the first
4633          *    volume in the array.  We add them here as this is the only
4634          *    opportunity to add them. add_to_super_imsm_volume()
4635          *    handles the non-failed disks and continues incrementing
4636          *    mpb->num_disks.
4637          * b) validate that 'failed_disks' matches the current number
4638          *    of missing disks if the container is populated
4639          */
4640         if (super->current_vol == 0) {
4641                 mpb->num_disks = 0;
4642                 for (i = 0; i < info->failed_disks; i++) {
4643                         struct imsm_disk *disk;
4644
4645                         mpb->num_disks++;
4646                         disk = __get_imsm_disk(mpb, i);
4647                         disk->status = CONFIGURED_DISK | FAILED_DISK;
4648                         disk->scsi_id = __cpu_to_le32(~(__u32)0);
4649                         snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN,
4650                                  "missing:%d", i);
4651                 }
4652                 find_missing(super);
4653         } else {
4654                 int missing = 0;
4655                 struct dl *d;
4656
4657                 for (d = super->missing; d; d = d->next)
4658                         missing++;
4659                 if (info->failed_disks > missing) {
4660                         pr_err("unable to add 'missing' disk to container\n");
4661                         return 0;
4662                 }
4663         }
4664
4665         if (!check_name(super, name, 0))
4666                 return 0;
4667         dv = xmalloc(sizeof(*dv));
4668         dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
4669         strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
4670         array_blocks = calc_array_size(info->level, info->raid_disks,
4671                                                info->layout, info->chunk_size,
4672                                                size * 2);
4673         /* round array size down to closest MB */
4674         array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
4675
4676         dev->size_low = __cpu_to_le32((__u32) array_blocks);
4677         dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
4678         dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
4679         vol = &dev->vol;
4680         vol->migr_state = 0;
4681         set_migr_type(dev, MIGR_INIT);
4682         vol->dirty = !info->state;
4683         vol->curr_migr_unit = 0;
4684         map = get_imsm_map(dev, MAP_0);
4685         set_pba_of_lba0(map, super->create_offset);
4686         set_blocks_per_member(map, info_to_blocks_per_member(info, size));
4687         map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
4688         map->failed_disk_num = ~0;
4689         if (info->level > 0)
4690                 map->map_state = (info->state ? IMSM_T_STATE_NORMAL
4691                                   : IMSM_T_STATE_UNINITIALIZED);
4692         else
4693                 map->map_state = info->failed_disks ? IMSM_T_STATE_FAILED :
4694                                                       IMSM_T_STATE_NORMAL;
4695         map->ddf = 1;
4696
4697         if (info->level == 1 && info->raid_disks > 2) {
4698                 free(dev);
4699                 free(dv);
4700                 pr_err("imsm does not support more than 2 disks"
4701                                 "in a raid1 volume\n");
4702                 return 0;
4703         }
4704
4705         map->raid_level = info->level;
4706         if (info->level == 10) {
4707                 map->raid_level = 1;
4708                 map->num_domains = info->raid_disks / 2;
4709         } else if (info->level == 1)
4710                 map->num_domains = info->raid_disks;
4711         else
4712                 map->num_domains = 1;
4713
4714         /* info->size is only int so use the 'size' parameter instead */
4715         num_data_stripes = (size * 2) / info_to_blocks_per_strip(info);
4716         num_data_stripes /= map->num_domains;
4717         set_num_data_stripes(map, num_data_stripes);
4718
4719         map->num_members = info->raid_disks;
4720         for (i = 0; i < map->num_members; i++) {
4721                 /* initialized in add_to_super */
4722                 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
4723         }
4724         mpb->num_raid_devs++;
4725
4726         dv->dev = dev;
4727         dv->index = super->current_vol;
4728         dv->next = super->devlist;
4729         super->devlist = dv;
4730
4731         imsm_update_version_info(super);
4732
4733         return 1;
4734 }
4735
4736 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
4737                            unsigned long long size, char *name,
4738                            char *homehost, int *uuid,
4739                            unsigned long long data_offset)
4740 {
4741         /* This is primarily called by Create when creating a new array.
4742          * We will then get add_to_super called for each component, and then
4743          * write_init_super called to write it out to each device.
4744          * For IMSM, Create can create on fresh devices or on a pre-existing
4745          * array.
4746          * To create on a pre-existing array a different method will be called.
4747          * This one is just for fresh drives.
4748          */
4749         struct intel_super *super;
4750         struct imsm_super *mpb;
4751         size_t mpb_size;
4752         char *version;
4753
4754         if (data_offset != INVALID_SECTORS) {
4755                 pr_err("data-offset not supported by imsm\n");
4756                 return 0;
4757         }
4758
4759         if (st->sb)
4760                 return init_super_imsm_volume(st, info, size, name, homehost, uuid,
4761                                               data_offset);
4762
4763         if (info)
4764                 mpb_size = disks_to_mpb_size(info->nr_disks);
4765         else
4766                 mpb_size = 512;
4767
4768         super = alloc_super();
4769         if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
4770                 free(super);
4771                 super = NULL;
4772         }
4773         if (!super) {
4774                 pr_err("%s could not allocate superblock\n", __func__);
4775                 return 0;
4776         }
4777         if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
4778                 pr_err("%s could not allocate migr_rec buffer\n", __func__);
4779                 free(super->buf);
4780                 free(super);
4781                 return 0;
4782         }
4783         memset(super->buf, 0, mpb_size);
4784         mpb = super->buf;
4785         mpb->mpb_size = __cpu_to_le32(mpb_size);
4786         st->sb = super;
4787
4788         if (info == NULL) {
4789                 /* zeroing superblock */
4790                 return 0;
4791         }
4792
4793         mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4794
4795         version = (char *) mpb->sig;
4796         strcpy(version, MPB_SIGNATURE);
4797         version += strlen(MPB_SIGNATURE);
4798         strcpy(version, MPB_VERSION_RAID0);
4799
4800         return 1;
4801 }
4802
4803 #ifndef MDASSEMBLE
4804 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
4805                                      int fd, char *devname)
4806 {
4807         struct intel_super *super = st->sb;
4808         struct imsm_super *mpb = super->anchor;
4809         struct imsm_disk *_disk;
4810         struct imsm_dev *dev;
4811         struct imsm_map *map;
4812         struct dl *dl, *df;
4813         int slot;
4814
4815         dev = get_imsm_dev(super, super->current_vol);
4816         map = get_imsm_map(dev, MAP_0);
4817
4818         if (! (dk->state & (1<<MD_DISK_SYNC))) {
4819                 pr_err("%s: Cannot add spare devices to IMSM volume\n",
4820                         devname);
4821                 return 1;
4822         }
4823
4824         if (fd == -1) {
4825                 /* we're doing autolayout so grab the pre-marked (in
4826                  * validate_geometry) raid_disk
4827                  */
4828                 for (dl = super->disks; dl; dl = dl->next)
4829                         if (dl->raiddisk == dk->raid_disk)
4830                                 break;
4831         } else {
4832                 for (dl = super->disks; dl ; dl = dl->next)
4833                         if (dl->major == dk->major &&
4834                             dl->minor == dk->minor)
4835                                 break;
4836         }
4837
4838         if (!dl) {
4839                 pr_err("%s is not a member of the same container\n", devname);
4840                 return 1;
4841         }
4842
4843         /* add a pristine spare to the metadata */
4844         if (dl->index < 0) {
4845                 dl->index = super->anchor->num_disks;
4846                 super->anchor->num_disks++;
4847         }
4848         /* Check the device has not already been added */
4849         slot = get_imsm_disk_slot(map, dl->index);
4850         if (slot >= 0 &&
4851             (get_imsm_ord_tbl_ent(dev, slot, MAP_X) & IMSM_ORD_REBUILD) == 0) {
4852                 pr_err("%s has been included in this array twice\n",
4853                         devname);
4854                 return 1;
4855         }
4856         set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index);
4857         dl->disk.status = CONFIGURED_DISK;
4858
4859         /* update size of 'missing' disks to be at least as large as the
4860          * largest acitve member (we only have dummy missing disks when
4861          * creating the first volume)
4862          */
4863         if (super->current_vol == 0) {
4864                 for (df = super->missing; df; df = df->next) {
4865                         if (total_blocks(&dl->disk) > total_blocks(&df->disk))
4866                                 set_total_blocks(&df->disk, total_blocks(&dl->disk));
4867                         _disk = __get_imsm_disk(mpb, df->index);
4868                         *_disk = df->disk;
4869                 }
4870         }
4871
4872         /* refresh unset/failed slots to point to valid 'missing' entries */
4873         for (df = super->missing; df; df = df->next)
4874                 for (slot = 0; slot < mpb->num_disks; slot++) {
4875                         __u32 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
4876
4877                         if ((ord & IMSM_ORD_REBUILD) == 0)
4878                                 continue;
4879                         set_imsm_ord_tbl_ent(map, slot, df->index | IMSM_ORD_REBUILD);
4880                         if (is_gen_migration(dev)) {
4881                                 struct imsm_map *map2 = get_imsm_map(dev,
4882                                                                      MAP_1);
4883                                 int slot2 = get_imsm_disk_slot(map2, df->index);
4884                                 if ((slot2 < map2->num_members) &&
4885                                     (slot2 >= 0)) {
4886                                         __u32 ord2 = get_imsm_ord_tbl_ent(dev,
4887                                                                          slot2,
4888                                                                          MAP_1);
4889                                         if ((unsigned)df->index ==
4890                                                                ord_to_idx(ord2))
4891                                                 set_imsm_ord_tbl_ent(map2,
4892                                                         slot2,
4893                                                         df->index |
4894                                                         IMSM_ORD_REBUILD);
4895                                 }
4896                         }
4897                         dprintf("set slot:%d to missing disk:%d\n", slot, df->index);
4898                         break;
4899                 }
4900
4901         /* if we are creating the first raid device update the family number */
4902         if (super->current_vol == 0) {
4903                 __u32 sum;
4904                 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
4905
4906                 _disk = __get_imsm_disk(mpb, dl->index);
4907                 if (!_dev || !_disk) {
4908                         pr_err("BUG mpb setup error\n");
4909                         return 1;
4910                 }
4911                 *_dev = *dev;
4912                 *_disk = dl->disk;
4913                 sum = random32();
4914                 sum += __gen_imsm_checksum(mpb);
4915                 mpb->family_num = __cpu_to_le32(sum);
4916                 mpb->orig_family_num = mpb->family_num;
4917         }
4918         super->current_disk = dl;
4919         return 0;
4920 }
4921
4922 /* mark_spare()
4923  *   Function marks disk as spare and restores disk serial
4924  *   in case it was previously marked as failed by takeover operation
4925  * reruns:
4926  *   -1 : critical error
4927  *    0 : disk is marked as spare but serial is not set
4928  *    1 : success
4929  */
4930 int mark_spare(struct dl *disk)
4931 {
4932         __u8 serial[MAX_RAID_SERIAL_LEN];
4933         int ret_val = -1;
4934
4935         if (!disk)
4936                 return ret_val;
4937
4938         ret_val = 0;
4939         if (!imsm_read_serial(disk->fd, NULL, serial)) {
4940                 /* Restore disk serial number, because takeover marks disk
4941                  * as failed and adds to serial ':0' before it becomes
4942                  * a spare disk.
4943                  */
4944                 serialcpy(disk->serial, serial);
4945                 serialcpy(disk->disk.serial, serial);
4946                 ret_val = 1;
4947         }
4948         disk->disk.status = SPARE_DISK;
4949         disk->index = -1;
4950
4951         return ret_val;
4952 }
4953
4954 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
4955                              int fd, char *devname,
4956                              unsigned long long data_offset)
4957 {
4958         struct intel_super *super = st->sb;
4959         struct dl *dd;
4960         unsigned long long size;
4961         __u32 id;
4962         int rv;
4963         struct stat stb;
4964
4965         /* If we are on an RAID enabled platform check that the disk is
4966          * attached to the raid controller.
4967          * We do not need to test disks attachment for container based additions,
4968          * they shall be already tested when container was created/assembled.
4969          */
4970         rv = find_intel_hba_capability(fd, super, devname);
4971         /* no orom/efi or non-intel hba of the disk */
4972         if (rv != 0) {
4973                 dprintf("capability: %p fd: %d ret: %d\n",
4974                         super->orom, fd, rv);
4975                 return 1;
4976         }
4977
4978         if (super->current_vol >= 0)
4979                 return add_to_super_imsm_volume(st, dk, fd, devname);
4980
4981         fstat(fd, &stb);
4982         dd = xcalloc(sizeof(*dd), 1);
4983         dd->major = major(stb.st_rdev);
4984         dd->minor = minor(stb.st_rdev);
4985         dd->devname = devname ? xstrdup(devname) : NULL;
4986         dd->fd = fd;
4987         dd->e = NULL;
4988         dd->action = DISK_ADD;
4989         rv = imsm_read_serial(fd, devname, dd->serial);
4990         if (rv) {
4991                 pr_err("failed to retrieve scsi serial, aborting\n");
4992                 free(dd);
4993                 abort();
4994         }
4995
4996         get_dev_size(fd, NULL, &size);
4997         size /= 512;
4998         serialcpy(dd->disk.serial, dd->serial);
4999         set_total_blocks(&dd->disk, size);
5000         if (__le32_to_cpu(dd->disk.total_blocks_hi) > 0) {
5001                 struct imsm_super *mpb = super->anchor;
5002                 mpb->attributes |= MPB_ATTRIB_2TB_DISK;
5003         }
5004         mark_spare(dd);
5005         if (sysfs_disk_to_scsi_id(fd, &id) == 0)
5006                 dd->disk.scsi_id = __cpu_to_le32(id);
5007         else
5008                 dd->disk.scsi_id = __cpu_to_le32(0);
5009
5010         if (st->update_tail) {
5011                 dd->next = super->disk_mgmt_list;
5012                 super->disk_mgmt_list = dd;
5013         } else {
5014                 dd->next = super->disks;
5015                 super->disks = dd;
5016                 super->updates_pending++;
5017         }
5018
5019         return 0;
5020 }
5021
5022
5023 static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
5024 {
5025         struct intel_super *super = st->sb;
5026         struct dl *dd;
5027
5028         /* remove from super works only in mdmon - for communication
5029          * manager - monitor. Check if communication memory buffer
5030          * is prepared.
5031          */
5032         if (!st->update_tail) {
5033                 pr_err("%s shall be used in mdmon context only"
5034                        "(line %d).\n", __func__, __LINE__);
5035                 return 1;
5036         }
5037         dd = xcalloc(1, sizeof(*dd));
5038         dd->major = dk->major;
5039         dd->minor = dk->minor;
5040         dd->fd = -1;
5041         mark_spare(dd);
5042         dd->action = DISK_REMOVE;
5043
5044         dd->next = super->disk_mgmt_list;
5045         super->disk_mgmt_list = dd;
5046
5047
5048         return 0;
5049 }
5050
5051 static int store_imsm_mpb(int fd, struct imsm_super *mpb);
5052
5053 static union {
5054         char buf[512];
5055         struct imsm_super anchor;
5056 } spare_record __attribute__ ((aligned(512)));
5057
5058 /* spare records have their own family number and do not have any defined raid
5059  * devices
5060  */
5061 static int write_super_imsm_spares(struct intel_super *super, int doclose)
5062 {
5063         struct imsm_super *mpb = super->anchor;
5064         struct imsm_super *spare = &spare_record.anchor;
5065         __u32 sum;
5066         struct dl *d;
5067
5068         spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
5069         spare->generation_num = __cpu_to_le32(1UL),
5070         spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
5071         spare->num_disks = 1,
5072         spare->num_raid_devs = 0,
5073         spare->cache_size = mpb->cache_size,
5074         spare->pwr_cycle_count = __cpu_to_le32(1),
5075
5076         snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
5077                  MPB_SIGNATURE MPB_VERSION_RAID0);
5078
5079         for (d = super->disks; d; d = d->next) {
5080                 if (d->index != -1)
5081                         continue;
5082
5083                 spare->disk[0] = d->disk;
5084                 if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
5085                         spare->attributes |= MPB_ATTRIB_2TB_DISK;
5086
5087                 sum = __gen_imsm_checksum(spare);
5088                 spare->family_num = __cpu_to_le32(sum);
5089                 spare->orig_family_num = 0;
5090                 sum = __gen_imsm_checksum(spare);
5091                 spare->check_sum = __cpu_to_le32(sum);
5092
5093                 if (store_imsm_mpb(d->fd, spare)) {
5094                         fprintf(stderr, "%s: failed for device %d:%d %s\n",
5095                                 __func__, d->major, d->minor, strerror(errno));
5096                         return 1;
5097                 }
5098                 if (doclose) {
5099                         close(d->fd);
5100                         d->fd = -1;
5101                 }
5102         }
5103
5104         return 0;
5105 }
5106
5107 static int write_super_imsm(struct supertype *st, int doclose)
5108 {
5109         struct intel_super *super = st->sb;
5110         struct imsm_super *mpb = super->anchor;
5111         struct dl *d;
5112         __u32 generation;
5113         __u32 sum;
5114         int spares = 0;
5115         int i;
5116         __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
5117         int num_disks = 0;
5118         int clear_migration_record = 1;
5119
5120         /* 'generation' is incremented everytime the metadata is written */
5121         generation = __le32_to_cpu(mpb->generation_num);
5122         generation++;
5123         mpb->generation_num = __cpu_to_le32(generation);
5124
5125         /* fix up cases where previous mdadm releases failed to set
5126          * orig_family_num
5127          */
5128         if (mpb->orig_family_num == 0)
5129                 mpb->orig_family_num = mpb->family_num;
5130
5131         for (d = super->disks; d; d = d->next) {
5132                 if (d->index == -1)
5133                         spares++;
5134                 else {
5135                         mpb->disk[d->index] = d->disk;
5136                         num_disks++;
5137                 }
5138         }
5139         for (d = super->missing; d; d = d->next) {
5140                 mpb->disk[d->index] = d->disk;
5141                 num_disks++;
5142         }
5143         mpb->num_disks = num_disks;
5144         mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
5145
5146         for (i = 0; i < mpb->num_raid_devs; i++) {
5147                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
5148                 struct imsm_dev *dev2 = get_imsm_dev(super, i);
5149                 if (dev && dev2) {
5150                         imsm_copy_dev(dev, dev2);
5151                         mpb_size += sizeof_imsm_dev(dev, 0);
5152                 }
5153                 if (is_gen_migration(dev2))
5154                         clear_migration_record = 0;
5155         }
5156         mpb_size += __le32_to_cpu(mpb->bbm_log_size);
5157         mpb->mpb_size = __cpu_to_le32(mpb_size);
5158
5159         /* recalculate checksum */
5160         sum = __gen_imsm_checksum(mpb);
5161         mpb->check_sum = __cpu_to_le32(sum);
5162
5163         if (super->clean_migration_record_by_mdmon) {
5164                 clear_migration_record = 1;
5165                 super->clean_migration_record_by_mdmon = 0;
5166         }
5167         if (clear_migration_record)
5168                 memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE);
5169
5170         /* write the mpb for disks that compose raid devices */
5171         for (d = super->disks; d ; d = d->next) {
5172                 if (d->index < 0 || is_failed(&d->disk))
5173                         continue;
5174
5175                 if (clear_migration_record) {
5176                         unsigned long long dsize;
5177
5178                         get_dev_size(d->fd, NULL, &dsize);
5179                         if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
5180                                 if (write(d->fd, super->migr_rec_buf,
5181                                         MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE)
5182                                         perror("Write migr_rec failed");
5183                         }
5184                 }
5185
5186                 if (store_imsm_mpb(d->fd, mpb))
5187                         fprintf(stderr,
5188                                 "%s: failed for device %d:%d (fd: %d)%s\n",
5189                                 __func__, d->major, d->minor,
5190                                 d->fd, strerror(errno));
5191
5192                 if (doclose) {
5193                         close(d->fd);
5194                         d->fd = -1;
5195                 }
5196         }
5197
5198         if (spares)
5199                 return write_super_imsm_spares(super, doclose);
5200
5201         return 0;
5202 }
5203
5204
5205 static int create_array(struct supertype *st, int dev_idx)
5206 {
5207         size_t len;
5208         struct imsm_update_create_array *u;
5209         struct intel_super *super = st->sb;
5210         struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
5211         struct imsm_map *map = get_imsm_map(dev, MAP_0);
5212         struct disk_info *inf;
5213         struct imsm_disk *disk;
5214         int i;
5215
5216         len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
5217               sizeof(*inf) * map->num_members;
5218         u = xmalloc(len);
5219         u->type = update_create_array;
5220         u->dev_idx = dev_idx;
5221         imsm_copy_dev(&u->dev, dev);
5222         inf = get_disk_info(u);
5223         for (i = 0; i < map->num_members; i++) {
5224                 int idx = get_imsm_disk_idx(dev, i, MAP_X);
5225
5226                 disk = get_imsm_disk(super, idx);
5227                 serialcpy(inf[i].serial, disk->serial);
5228         }
5229         append_metadata_update(st, u, len);
5230
5231         return 0;
5232 }
5233
5234 static int mgmt_disk(struct supertype *st)
5235 {
5236         struct intel_super *super = st->sb;
5237         size_t len;
5238         struct imsm_update_add_remove_disk *u;
5239
5240         if (!super->disk_mgmt_list)
5241                 return 0;
5242
5243         len = sizeof(*u);
5244         u = xmalloc(len);
5245         u->type = update_add_remove_disk;
5246         append_metadata_update(st, u, len);
5247
5248         return 0;
5249 }
5250
5251 static int write_init_super_imsm(struct supertype *st)
5252 {
5253         struct intel_super *super = st->sb;
5254         int current_vol = super->current_vol;
5255
5256         /* we are done with current_vol reset it to point st at the container */
5257         super->current_vol = -1;
5258
5259         if (st->update_tail) {
5260                 /* queue the recently created array / added disk
5261                  * as a metadata update */
5262                 int rv;
5263
5264                 /* determine if we are creating a volume or adding a disk */
5265                 if (current_vol < 0) {
5266                         /* in the mgmt (add/remove) disk case we are running
5267                          * in mdmon context, so don't close fd's
5268                          */
5269                         return mgmt_disk(st);
5270                 } else
5271                         rv = create_array(st, current_vol);
5272
5273                 return rv;
5274         } else {
5275                 struct dl *d;
5276                 for (d = super->disks; d; d = d->next)
5277                         Kill(d->devname, NULL, 0, -1, 1);
5278                 return write_super_imsm(st, 1);
5279         }
5280 }
5281 #endif
5282
5283 static int store_super_imsm(struct supertype *st, int fd)
5284 {
5285         struct intel_super *super = st->sb;
5286         struct imsm_super *mpb = super ? super->anchor : NULL;
5287
5288         if (!mpb)
5289                 return 1;
5290
5291 #ifndef MDASSEMBLE
5292         return store_imsm_mpb(fd, mpb);
5293 #else
5294         return 1;
5295 #endif
5296 }
5297
5298 static int imsm_bbm_log_size(struct imsm_super *mpb)
5299 {
5300         return __le32_to_cpu(mpb->bbm_log_size);
5301 }
5302
5303 #ifndef MDASSEMBLE
5304 static int validate_geometry_imsm_container(struct supertype *st, int level,
5305                                             int layout, int raiddisks, int chunk,
5306                                             unsigned long long size,
5307                                             unsigned long long data_offset,
5308                                             char *dev,
5309                                             unsigned long long *freesize,
5310                                             int verbose)
5311 {
5312         int fd;
5313         unsigned long long ldsize;
5314         struct intel_super *super=NULL;
5315         int rv = 0;
5316
5317         if (level != LEVEL_CONTAINER)
5318                 return 0;
5319         if (!dev)
5320                 return 1;
5321
5322         fd = open(dev, O_RDONLY|O_EXCL, 0);
5323         if (fd < 0) {
5324                 if (verbose > 0)
5325                         pr_err("imsm: Cannot open %s: %s\n",
5326                                 dev, strerror(errno));
5327                 return 0;
5328         }
5329         if (!get_dev_size(fd, dev, &ldsize)) {
5330                 close(fd);
5331                 return 0;
5332         }
5333
5334         /* capabilities retrieve could be possible
5335          * note that there is no fd for the disks in array.
5336          */
5337         super = alloc_super();
5338         rv = find_intel_hba_capability(fd, super, verbose > 0 ? dev : NULL);
5339         if (rv != 0) {
5340 #if DEBUG
5341                 char str[256];
5342                 fd2devname(fd, str);
5343                 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
5344                         fd, str, super->orom, rv, raiddisks);
5345 #endif
5346                 /* no orom/efi or non-intel hba of the disk */
5347                 close(fd);
5348                 free_imsm(super);
5349                 return 0;
5350         }
5351         close(fd);
5352         if (super->orom) {
5353                 if (raiddisks > super->orom->tds) {
5354                         if (verbose)
5355                                 pr_err("%d exceeds maximum number of"
5356                                         " platform supported disks: %d\n",
5357                                         raiddisks, super->orom->tds);
5358                         free_imsm(super);
5359                         return 0;
5360                 }
5361                 if ((super->orom->attr & IMSM_OROM_ATTR_2TB_DISK) == 0 &&
5362                     (ldsize >> 9) >> 32 > 0) {
5363                         if (verbose)
5364                                 pr_err("%s exceeds maximum platform supported size\n", dev);
5365                         free_imsm(super);
5366                         return 0;
5367                 }
5368         }
5369
5370         *freesize = avail_size_imsm(st, ldsize >> 9, data_offset);
5371         free_imsm(super);
5372
5373         return 1;
5374 }
5375
5376 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
5377 {
5378         const unsigned long long base_start = e[*idx].start;
5379         unsigned long long end = base_start + e[*idx].size;
5380         int i;
5381
5382         if (base_start == end)
5383                 return 0;
5384
5385         *idx = *idx + 1;
5386         for (i = *idx; i < num_extents; i++) {
5387                 /* extend overlapping extents */
5388                 if (e[i].start >= base_start &&
5389                     e[i].start <= end) {
5390                         if (e[i].size == 0)
5391                                 return 0;
5392                         if (e[i].start + e[i].size > end)
5393                                 end = e[i].start + e[i].size;
5394                 } else if (e[i].start > end) {
5395                         *idx = i;
5396                         break;
5397                 }
5398         }
5399
5400         return end - base_start;
5401 }
5402
5403 static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
5404 {
5405         /* build a composite disk with all known extents and generate a new
5406          * 'maxsize' given the "all disks in an array must share a common start
5407          * offset" constraint
5408          */
5409         struct extent *e = xcalloc(sum_extents, sizeof(*e));
5410         struct dl *dl;
5411         int i, j;
5412         int start_extent;
5413         unsigned long long pos;
5414         unsigned long long start = 0;
5415         unsigned long long maxsize;
5416         unsigned long reserve;
5417
5418         /* coalesce and sort all extents. also, check to see if we need to
5419          * reserve space between member arrays
5420          */
5421         j = 0;
5422         for (dl = super->disks; dl; dl = dl->next) {
5423                 if (!dl->e)
5424                         continue;
5425                 for (i = 0; i < dl->extent_cnt; i++)
5426                         e[j++] = dl->e[i];
5427         }
5428         qsort(e, sum_extents, sizeof(*e), cmp_extent);
5429
5430         /* merge extents */
5431         i = 0;
5432         j = 0;
5433         while (i < sum_extents) {
5434                 e[j].start = e[i].start;
5435                 e[j].size = find_size(e, &i, sum_extents);
5436                 j++;
5437                 if (e[j-1].size == 0)
5438                         break;
5439         }
5440
5441         pos = 0;
5442         maxsize = 0;
5443         start_extent = 0;
5444         i = 0;
5445         do {
5446                 unsigned long long esize;
5447
5448                 esize = e[i].start - pos;
5449                 if (esize >= maxsize) {
5450                         maxsize = esize;
5451                         start = pos;
5452                         start_extent = i;
5453                 }
5454                 pos = e[i].start + e[i].size;
5455                 i++;
5456         } while (e[i-1].size);
5457         free(e);
5458
5459         if (maxsize == 0)
5460                 return 0;
5461
5462         /* FIXME assumes volume at offset 0 is the first volume in a
5463          * container
5464          */
5465         if (start_extent > 0)
5466                 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
5467         else
5468                 reserve = 0;
5469
5470         if (maxsize < reserve)
5471                 return 0;
5472
5473         super->create_offset = ~((unsigned long long) 0);
5474         if (start + reserve > super->create_offset)
5475                 return 0; /* start overflows create_offset */
5476         super->create_offset = start + reserve;
5477
5478         return maxsize - reserve;
5479 }
5480
5481 static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
5482 {
5483         if (level < 0 || level == 6 || level == 4)
5484                 return 0;
5485
5486         /* if we have an orom prevent invalid raid levels */
5487         if (orom)
5488                 switch (level) {
5489                 case 0: return imsm_orom_has_raid0(orom);
5490                 case 1:
5491                         if (raiddisks > 2)
5492                                 return imsm_orom_has_raid1e(orom);
5493                         return imsm_orom_has_raid1(orom) && raiddisks == 2;
5494                 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
5495                 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
5496                 }
5497         else
5498                 return 1; /* not on an Intel RAID platform so anything goes */
5499
5500         return 0;
5501 }
5502
5503
5504 static int
5505 active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
5506                         int dpa, int verbose)
5507 {
5508         struct mdstat_ent *mdstat = mdstat_read(0, 0);
5509         struct mdstat_ent *memb = NULL;
5510         int count = 0;
5511         int num = 0;
5512         struct md_list *dv = NULL;
5513         int found;
5514
5515         for (memb = mdstat ; memb ; memb = memb->next) {
5516                 if (memb->metadata_version &&
5517                     (strncmp(memb->metadata_version, "external:", 9) == 0)  &&
5518                     (strcmp(&memb->metadata_version[9], name) == 0) &&
5519                     !is_subarray(memb->metadata_version+9) &&
5520                     memb->members) {
5521                         struct dev_member *dev = memb->members;
5522                         int fd = -1;
5523                         while(dev && (fd < 0)) {
5524                                 char *path = xmalloc(strlen(dev->name) + strlen("/dev/") + 1);
5525                                 num = sprintf(path, "%s%s", "/dev/", dev->name);
5526                                 if (num > 0)
5527                                         fd = open(path, O_RDONLY, 0);
5528                                 if ((num <= 0) || (fd < 0)) {
5529                                         pr_vrb(": Cannot open %s: %s\n",
5530                                                dev->name, strerror(errno));
5531                                 }
5532                                 free(path);
5533                                 dev = dev->next;
5534                         }
5535                         found = 0;
5536                         if ((fd >= 0) && disk_attached_to_hba(fd, hba)) {
5537                                 struct mdstat_ent *vol;
5538                                 for (vol = mdstat ; vol ; vol = vol->next) {
5539                                         if ((vol->active > 0) &&
5540                                             vol->metadata_version &&
5541                                             is_container_member(vol, memb->dev)) {
5542                                                 found++;
5543                                                 count++;
5544                                         }
5545                                 }
5546                                 if (*devlist && (found < dpa)) {
5547                                         dv = xcalloc(1, sizeof(*dv));
5548                                         dv->devname = xmalloc(strlen(memb->dev) + strlen("/dev/") + 1);
5549                                         sprintf(dv->devname, "%s%s", "/dev/", memb->dev);
5550                                         dv->found = found;
5551                                         dv->used = 0;
5552                                         dv->next = *devlist;
5553                                         *devlist = dv;
5554                                 }
5555                         }
5556                         if (fd >= 0)
5557                                 close(fd);
5558                 }
5559         }
5560         free_mdstat(mdstat);
5561         return count;
5562 }
5563
5564 #ifdef DEBUG_LOOP
5565 static struct md_list*
5566 get_loop_devices(void)
5567 {
5568         int i;
5569         struct md_list *devlist = NULL;
5570         struct md_list *dv = NULL;
5571
5572         for(i = 0; i < 12; i++) {
5573                 dv = xcalloc(1, sizeof(*dv));
5574                 dv->devname = xmalloc(40);
5575                 sprintf(dv->devname, "/dev/loop%d", i);
5576                 dv->next = devlist;
5577                 devlist = dv;
5578         }
5579         return devlist;
5580 }
5581 #endif
5582
5583 static struct md_list*
5584 get_devices(const char *hba_path)
5585 {
5586         struct md_list *devlist = NULL;
5587         struct md_list *dv = NULL;
5588         struct dirent *ent;
5589         DIR *dir;
5590         int err = 0;
5591
5592 #if DEBUG_LOOP
5593         devlist = get_loop_devices();
5594         return devlist;
5595 #endif
5596         /* scroll through /sys/dev/block looking for devices attached to
5597          * this hba
5598          */
5599         dir = opendir("/sys/dev/block");
5600         for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
5601                 int fd;
5602                 char buf[1024];
5603                 int major, minor;
5604                 char *path = NULL;
5605                 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
5606                         continue;
5607                 path = devt_to_devpath(makedev(major, minor));
5608                 if (!path)
5609                         continue;
5610                 if (!path_attached_to_hba(path, hba_path)) {
5611                         free(path);
5612                         path = NULL;
5613                         continue;
5614                 }
5615                 free(path);
5616                 path = NULL;
5617                 fd = dev_open(ent->d_name, O_RDONLY);
5618                 if (fd >= 0) {
5619                         fd2devname(fd, buf);
5620                         close(fd);
5621                 } else {
5622                         pr_err("cannot open device: %s\n",
5623                                 ent->d_name);
5624                         continue;
5625                 }
5626
5627
5628                 dv = xcalloc(1, sizeof(*dv));
5629                 dv->devname = xstrdup(buf);
5630                 dv->next = devlist;
5631                 devlist = dv;
5632         }
5633         if (err) {
5634                 while(devlist) {
5635                         dv = devlist;
5636                         devlist = devlist->next;
5637                         free(dv->devname);
5638                         free(dv);
5639                 }
5640         }
5641         closedir(dir);
5642         return devlist;
5643 }
5644
5645 static int
5646 count_volumes_list(struct md_list *devlist, char *homehost,
5647                    int verbose, int *found)
5648 {
5649         struct md_list *tmpdev;
5650         int count = 0;
5651         struct supertype *st = NULL;
5652
5653         /* first walk the list of devices to find a consistent set
5654          * that match the criterea, if that is possible.
5655          * We flag the ones we like with 'used'.
5656          */
5657         *found = 0;
5658         st = match_metadata_desc_imsm("imsm");
5659         if (st == NULL) {
5660                 pr_vrb(": cannot allocate memory for imsm supertype\n");
5661                 return 0;
5662         }
5663
5664         for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
5665                 char *devname = tmpdev->devname;
5666                 struct stat stb;
5667                 struct supertype *tst;
5668                 int dfd;
5669                 if (tmpdev->used > 1)
5670                         continue;
5671                 tst = dup_super(st);
5672                 if (tst == NULL) {
5673                         pr_vrb(": cannot allocate memory for imsm supertype\n");
5674                         goto err_1;
5675                 }
5676                 tmpdev->container = 0;
5677                 dfd = dev_open(devname, O_RDONLY|O_EXCL);
5678                 if (dfd < 0) {
5679                         dprintf(": cannot open device %s: %s\n",
5680                                 devname, strerror(errno));
5681                         tmpdev->used = 2;
5682                 } else if (fstat(dfd, &stb)< 0) {
5683                         /* Impossible! */
5684                         dprintf(": fstat failed for %s: %s\n",
5685                                 devname, strerror(errno));
5686                         tmpdev->used = 2;
5687                 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
5688                         dprintf(": %s is not a block device.\n",
5689                                 devname);
5690                         tmpdev->used = 2;
5691                 } else if (must_be_container(dfd)) {
5692                         struct supertype *cst;
5693                         cst = super_by_fd(dfd, NULL);
5694                         if (cst == NULL) {
5695                                 dprintf(": cannot recognize container type %s\n",
5696                                         devname);
5697                                 tmpdev->used = 2;
5698                         } else if (tst->ss != st->ss) {
5699                                 dprintf(": non-imsm container - ignore it: %s\n",
5700                                         devname);
5701                                 tmpdev->used = 2;
5702                         } else if (!tst->ss->load_container ||
5703                                    tst->ss->load_container(tst, dfd, NULL))
5704                                 tmpdev->used = 2;
5705                         else {
5706                                 tmpdev->container = 1;
5707                         }
5708                         if (cst)
5709                                 cst->ss->free_super(cst);
5710                 } else {
5711                         tmpdev->st_rdev = stb.st_rdev;
5712                         if (tst->ss->load_super(tst,dfd, NULL)) {
5713                                 dprintf(": no RAID superblock on %s\n",
5714                                         devname);
5715                                 tmpdev->used = 2;
5716                         } else if (tst->ss->compare_super == NULL) {
5717                                 dprintf(": Cannot assemble %s metadata on %s\n",
5718                                         tst->ss->name, devname);
5719                                 tmpdev->used = 2;
5720                         }
5721                 }
5722                 if (dfd >= 0)
5723                         close(dfd);
5724                 if (tmpdev->used == 2 || tmpdev->used == 4) {
5725                         /* Ignore unrecognised devices during auto-assembly */
5726                         goto loop;
5727                 }
5728                 else {
5729                         struct mdinfo info;
5730                         tst->ss->getinfo_super(tst, &info, NULL);
5731
5732                         if (st->minor_version == -1)
5733                                 st->minor_version = tst->minor_version;
5734
5735                         if (memcmp(info.uuid, uuid_zero,
5736                                    sizeof(int[4])) == 0) {
5737                                 /* this is a floating spare.  It cannot define
5738                                  * an array unless there are no more arrays of
5739                                  * this type to be found.  It can be included
5740                                  * in an array of this type though.
5741                                  */
5742                                 tmpdev->used = 3;
5743                                 goto loop;
5744                         }
5745
5746                         if (st->ss != tst->ss ||
5747                             st->minor_version != tst->minor_version ||
5748                             st->ss->compare_super(st, tst) != 0) {
5749                                 /* Some mismatch. If exactly one array matches this host,
5750                                  * we can resolve on that one.
5751                                  * Or, if we are auto assembling, we just ignore the second
5752                                  * for now.
5753                                  */
5754                                 dprintf(": superblock on %s doesn't match others - assembly aborted\n",
5755                                         devname);
5756                                 goto loop;
5757                         }
5758                         tmpdev->used = 1;
5759                         *found = 1;
5760                         dprintf("found: devname: %s\n", devname);
5761                 }
5762         loop:
5763                 if (tst)
5764                         tst->ss->free_super(tst);
5765         }
5766         if (*found != 0) {
5767                 int err;
5768                 if ((err = load_super_imsm_all(st, -1, &st->sb, NULL, devlist, 0)) == 0) {
5769                         struct mdinfo *iter, *head = st->ss->container_content(st, NULL);
5770                         for (iter = head; iter; iter = iter->next) {
5771                                 dprintf("content->text_version: %s vol\n",
5772                                         iter->text_version);
5773                                 if (iter->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
5774                                         /* do not assemble arrays with unsupported
5775                                            configurations */
5776                                         dprintf(": Cannot activate member %s.\n",
5777                                                 iter->text_version);
5778                                 } else
5779                                         count++;
5780                         }
5781                         sysfs_free(head);
5782
5783                 } else {
5784                         dprintf(" no valid super block on device list: err: %d %p\n",
5785                                 err, st->sb);
5786                 }
5787         } else {
5788                 dprintf(" no more devices to examin\n");
5789         }
5790
5791         for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
5792                 if ((tmpdev->used == 1) && (tmpdev->found)) {
5793                         if (count) {
5794                                 if (count < tmpdev->found)
5795                                         count = 0;
5796                                 else
5797                                         count -= tmpdev->found;
5798                         }
5799                 }
5800                 if (tmpdev->used == 1)
5801                         tmpdev->used = 4;
5802         }
5803         err_1:
5804         if (st)
5805                 st->ss->free_super(st);
5806         return count;
5807 }
5808
5809
5810 static int
5811 count_volumes(char *hba, int dpa, int verbose)
5812 {
5813         struct md_list *devlist = NULL;
5814         int count = 0;
5815         int found = 0;;
5816
5817         devlist = get_devices(hba);
5818         /* if no intel devices return zero volumes */
5819         if (devlist == NULL)
5820                 return 0;
5821
5822         count = active_arrays_by_format("imsm", hba, &devlist, dpa, verbose);
5823         dprintf(" path: %s active arrays: %d\n", hba, count);
5824         if (devlist == NULL)
5825                 return 0;
5826         do  {
5827                 found = 0;
5828                 count += count_volumes_list(devlist,
5829                                             NULL,
5830                                             verbose,
5831                                             &found);
5832                 dprintf("found %d count: %d\n", found, count);
5833         } while (found);
5834
5835         dprintf("path: %s total number of volumes: %d\n", hba, count);
5836
5837         while(devlist) {
5838                 struct md_list *dv = devlist;
5839                 devlist = devlist->next;
5840                 free(dv->devname);
5841                 free(dv);
5842         }
5843         return count;
5844 }
5845
5846 static int imsm_default_chunk(const struct imsm_orom *orom)
5847 {
5848         /* up to 512 if the plaform supports it, otherwise the platform max.
5849          * 128 if no platform detected
5850          */
5851         int fs = max(7, orom ? fls(orom->sss) : 0);
5852
5853         return min(512, (1 << fs));
5854 }
5855
5856 static int
5857 validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
5858                             int raiddisks, int *chunk, unsigned long long size, int verbose)
5859 {
5860         /* check/set platform and metadata limits/defaults */
5861         if (super->orom && raiddisks > super->orom->dpa) {
5862                 pr_vrb(": platform supports a maximum of %d disks per array\n",
5863                        super->orom->dpa);
5864                 return 0;
5865         }
5866
5867         /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
5868         if (!is_raid_level_supported(super->orom, level, raiddisks)) {
5869                 pr_vrb(": platform does not support raid%d with %d disk%s\n",
5870                         level, raiddisks, raiddisks > 1 ? "s" : "");
5871                 return 0;
5872         }
5873
5874         if (chunk && (*chunk == 0 || *chunk == UnSet))
5875                 *chunk = imsm_default_chunk(super->orom);
5876
5877         if (super->orom && chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
5878                 pr_vrb(": platform does not support a chunk size of: "
5879                        "%d\n", *chunk);
5880                 return 0;
5881         }
5882
5883         if (layout != imsm_level_to_layout(level)) {
5884                 if (level == 5)
5885                         pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
5886                 else if (level == 10)
5887                         pr_vrb(": imsm raid 10 only supports the n2 layout\n");
5888                 else
5889                         pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
5890                                 layout, level);
5891                 return 0;
5892         }
5893
5894         if (super->orom && (super->orom->attr & IMSM_OROM_ATTR_2TB) == 0 && chunk &&
5895                         (calc_array_size(level, raiddisks, layout, *chunk, size) >> 32) > 0) {
5896                 pr_vrb(": platform does not support a volume size over 2TB\n");
5897                 return 0;
5898         }
5899         return 1;
5900 }
5901
5902 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
5903  * FIX ME add ahci details
5904  */
5905 static int validate_geometry_imsm_volume(struct supertype *st, int level,
5906                                          int layout, int raiddisks, int *chunk,
5907                                          unsigned long long size,
5908                                          unsigned long long data_offset,
5909                                          char *dev,
5910                                          unsigned long long *freesize,
5911                                          int verbose)
5912 {
5913         struct stat stb;
5914         struct intel_super *super = st->sb;
5915         struct imsm_super *mpb;
5916         struct dl *dl;
5917         unsigned long long pos = 0;
5918         unsigned long long maxsize;
5919         struct extent *e;
5920         int i;
5921
5922         /* We must have the container info already read in. */
5923         if (!super)
5924                 return 0;
5925
5926         mpb = super->anchor;
5927
5928         if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) {
5929                 pr_err("RAID gemetry validation failed. "
5930                         "Cannot proceed with the action(s).\n");
5931                 return 0;
5932         }
5933         if (!dev) {
5934                 /* General test:  make sure there is space for
5935                  * 'raiddisks' device extents of size 'size' at a given
5936                  * offset
5937                  */
5938                 unsigned long long minsize = size;
5939                 unsigned long long start_offset = MaxSector;
5940                 int dcnt = 0;
5941                 if (minsize == 0)
5942                         minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
5943                 for (dl = super->disks; dl ; dl = dl->next) {
5944                         int found = 0;
5945
5946                         pos = 0;
5947                         i = 0;
5948                         e = get_extents(super, dl);
5949                         if (!e) continue;
5950                         do {
5951                                 unsigned long long esize;
5952                                 esize = e[i].start - pos;
5953                                 if (esize >= minsize)
5954                                         found = 1;
5955                                 if (found && start_offset == MaxSector) {
5956                                         start_offset = pos;
5957                                         break;
5958                                 } else if (found && pos != start_offset) {
5959                                         found = 0;
5960                                         break;
5961                                 }
5962                                 pos = e[i].start + e[i].size;
5963                                 i++;
5964                         } while (e[i-1].size);
5965                         if (found)
5966                                 dcnt++;
5967                         free(e);
5968                 }
5969                 if (dcnt < raiddisks) {
5970                         if (verbose)
5971                                 pr_err("imsm: Not enough "
5972                                         "devices with space for this array "
5973                                         "(%d < %d)\n",
5974                                         dcnt, raiddisks);
5975                         return 0;
5976                 }
5977                 return 1;
5978         }
5979
5980         /* This device must be a member of the set */
5981         if (stat(dev, &stb) < 0)
5982                 return 0;
5983         if ((S_IFMT & stb.st_mode) != S_IFBLK)
5984                 return 0;
5985         for (dl = super->disks ; dl ; dl = dl->next) {
5986                 if (dl->major == (int)major(stb.st_rdev) &&
5987                     dl->minor == (int)minor(stb.st_rdev))
5988                         break;
5989         }
5990         if (!dl) {
5991                 if (verbose)
5992                         pr_err("%s is not in the "
5993                                 "same imsm set\n", dev);
5994                 return 0;
5995         } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
5996                 /* If a volume is present then the current creation attempt
5997                  * cannot incorporate new spares because the orom may not
5998                  * understand this configuration (all member disks must be
5999                  * members of each array in the container).
6000                  */
6001                 pr_err("%s is a spare and a volume"
6002                         " is already defined for this container\n", dev);
6003                 pr_err("The option-rom requires all member"
6004                         " disks to be a member of all volumes\n");
6005                 return 0;
6006         } else if (super->orom && mpb->num_raid_devs > 0 &&
6007                    mpb->num_disks != raiddisks) {
6008                 pr_err("The option-rom requires all member"
6009                         " disks to be a member of all volumes\n");
6010                 return 0;
6011         }
6012
6013         /* retrieve the largest free space block */
6014         e = get_extents(super, dl);
6015         maxsize = 0;
6016         i = 0;
6017         if (e) {
6018                 do {
6019                         unsigned long long esize;
6020
6021                         esize = e[i].start - pos;
6022                         if (esize >= maxsize)
6023                                 maxsize = esize;
6024                         pos = e[i].start + e[i].size;
6025                         i++;
6026                 } while (e[i-1].size);
6027                 dl->e = e;
6028                 dl->extent_cnt = i;
6029         } else {
6030                 if (verbose)
6031                         pr_err("unable to determine free space for: %s\n",
6032                                 dev);
6033                 return 0;
6034         }
6035         if (maxsize < size) {
6036                 if (verbose)
6037                         pr_err("%s not enough space (%llu < %llu)\n",
6038                                 dev, maxsize, size);
6039                 return 0;
6040         }
6041
6042         /* count total number of extents for merge */
6043         i = 0;
6044         for (dl = super->disks; dl; dl = dl->next)
6045                 if (dl->e)
6046                         i += dl->extent_cnt;
6047
6048         maxsize = merge_extents(super, i);
6049
6050         if (!check_env("IMSM_NO_PLATFORM") &&
6051             mpb->num_raid_devs > 0 && size && size != maxsize) {
6052                 pr_err("attempting to create a second "
6053                         "volume with size less then remaining space. "
6054                         "Aborting...\n");
6055                 return 0;
6056         }
6057
6058         if (maxsize < size || maxsize == 0) {
6059                 if (verbose) {
6060                         if (maxsize == 0)
6061                                 pr_err("no free space"
6062                                                 " left on device. Aborting...\n");
6063                         else
6064                                 pr_err("not enough space"
6065                                                 " to create volume of given size"
6066                                                 " (%llu < %llu). Aborting...\n",
6067                                                 maxsize, size);
6068                 }
6069                 return 0;
6070         }
6071
6072         *freesize = maxsize;
6073
6074         if (super->orom) {
6075                 int count = count_volumes(super->hba->path,
6076                                       super->orom->dpa, verbose);
6077                 if (super->orom->vphba <= count) {
6078                         pr_vrb(": platform does not support more than %d raid volumes.\n",
6079                                super->orom->vphba);
6080                         return 0;
6081                 }
6082         }
6083         return 1;
6084 }
6085
6086 static int imsm_get_free_size(struct supertype *st, int raiddisks,
6087                          unsigned long long size, int chunk,
6088                          unsigned long long *freesize)
6089 {
6090         struct intel_super *super = st->sb;
6091         struct imsm_super *mpb = super->anchor;
6092         struct dl *dl;
6093         int i;
6094         int extent_cnt;
6095         struct extent *e;
6096         unsigned long long maxsize;
6097         unsigned long long minsize;
6098         int cnt;
6099         int used;
6100
6101         /* find the largest common start free region of the possible disks */
6102         used = 0;
6103         extent_cnt = 0;
6104         cnt = 0;
6105         for (dl = super->disks; dl; dl = dl->next) {
6106                 dl->raiddisk = -1;
6107
6108                 if (dl->index >= 0)
6109                         used++;
6110
6111                 /* don't activate new spares if we are orom constrained
6112                  * and there is already a volume active in the container
6113                  */
6114                 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
6115                         continue;
6116
6117                 e = get_extents(super, dl);
6118                 if (!e)
6119                         continue;
6120                 for (i = 1; e[i-1].size; i++)
6121                         ;
6122                 dl->e = e;
6123                 dl->extent_cnt = i;
6124                 extent_cnt += i;
6125                 cnt++;
6126         }
6127
6128         maxsize = merge_extents(super, extent_cnt);
6129         minsize = size;
6130         if (size == 0)
6131                 /* chunk is in K */
6132                 minsize = chunk * 2;
6133
6134         if (cnt < raiddisks ||
6135             (super->orom && used && used != raiddisks) ||
6136             maxsize < minsize ||
6137             maxsize == 0) {
6138                 pr_err("not enough devices with space to create array.\n");
6139                 return 0; /* No enough free spaces large enough */
6140         }
6141
6142         if (size == 0) {
6143                 size = maxsize;
6144                 if (chunk) {
6145                         size /= 2 * chunk;
6146                         size *= 2 * chunk;
6147                 }
6148                 maxsize = size;
6149         }
6150         if (!check_env("IMSM_NO_PLATFORM") &&
6151             mpb->num_raid_devs > 0 && size && size != maxsize) {
6152                 pr_err("attempting to create a second "
6153                         "volume with size less then remaining space. "
6154                         "Aborting...\n");
6155                 return 0;
6156         }
6157         cnt = 0;
6158         for (dl = super->disks; dl; dl = dl->next)
6159                 if (dl->e)
6160                         dl->raiddisk = cnt++;
6161
6162         *freesize = size;
6163
6164         dprintf("imsm: imsm_get_free_size() returns : %llu\n", size);
6165
6166         return 1;
6167 }
6168
6169 static int reserve_space(struct supertype *st, int raiddisks,
6170                          unsigned long long size, int chunk,
6171                          unsigned long long *freesize)
6172 {
6173         struct intel_super *super = st->sb;
6174         struct dl *dl;
6175         int cnt;
6176         int rv = 0;
6177
6178         rv = imsm_get_free_size(st, raiddisks, size, chunk, freesize);
6179         if (rv) {
6180                 cnt = 0;
6181                 for (dl = super->disks; dl; dl = dl->next)
6182                         if (dl->e)
6183                                 dl->raiddisk = cnt++;
6184                 rv = 1;
6185         }
6186
6187         return rv;
6188 }
6189
6190 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
6191                                   int raiddisks, int *chunk, unsigned long long size,
6192                                   unsigned long long data_offset,
6193                                   char *dev, unsigned long long *freesize,
6194                                   int verbose)
6195 {
6196         int fd, cfd;
6197         struct mdinfo *sra;
6198         int is_member = 0;
6199
6200         /* load capability
6201          * if given unused devices create a container
6202          * if given given devices in a container create a member volume
6203          */
6204         if (level == LEVEL_CONTAINER) {
6205                 /* Must be a fresh device to add to a container */
6206                 return validate_geometry_imsm_container(st, level, layout,
6207                                                         raiddisks,
6208                                                         chunk?*chunk:0,
6209                                                         size, data_offset,
6210                                                         dev, freesize,
6211                                                         verbose);
6212         }
6213
6214         if (!dev) {
6215                 if (st->sb) {
6216                         struct intel_super *super = st->sb;
6217                         if (!validate_geometry_imsm_orom(st->sb, level, layout,
6218                                                          raiddisks, chunk, size,
6219                                                          verbose))
6220                                 return 0;
6221                         /* we are being asked to automatically layout a
6222                          * new volume based on the current contents of
6223                          * the container.  If the the parameters can be
6224                          * satisfied reserve_space will record the disks,
6225                          * start offset, and size of the volume to be
6226                          * created.  add_to_super and getinfo_super
6227                          * detect when autolayout is in progress.
6228                          */
6229                         /* assuming that freesize is always given when array is
6230                            created */
6231                         if (super->orom && freesize) {
6232                                 int count;
6233                                 count = count_volumes(super->hba->path,
6234                                                       super->orom->dpa, verbose);
6235                                 if (super->orom->vphba <= count) {
6236                                         pr_vrb(": platform does not support more"
6237                                                " than %d raid volumes.\n",
6238                                                super->orom->vphba);
6239                                         return 0;
6240                                 }
6241                         }
6242                         if (freesize)
6243                                 return reserve_space(st, raiddisks, size,
6244                                                      chunk?*chunk:0, freesize);
6245                 }
6246                 return 1;
6247         }
6248         if (st->sb) {
6249                 /* creating in a given container */
6250                 return validate_geometry_imsm_volume(st, level, layout,
6251                                                      raiddisks, chunk, size,
6252                                                      data_offset,
6253                                                      dev, freesize, verbose);
6254         }
6255
6256         /* This device needs to be a device in an 'imsm' container */
6257         fd = open(dev, O_RDONLY|O_EXCL, 0);
6258         if (fd >= 0) {
6259                 if (verbose)
6260                         pr_err("Cannot create this array on device %s\n",
6261                                dev);
6262                 close(fd);
6263                 return 0;
6264         }
6265         if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
6266                 if (verbose)
6267                         pr_err("Cannot open %s: %s\n",
6268                                 dev, strerror(errno));
6269                 return 0;
6270         }
6271         /* Well, it is in use by someone, maybe an 'imsm' container. */
6272         cfd = open_container(fd);
6273         close(fd);
6274         if (cfd < 0) {
6275                 if (verbose)
6276                         pr_err("Cannot use %s: It is busy\n",
6277                                 dev);
6278                 return 0;
6279         }
6280         sra = sysfs_read(cfd, NULL, GET_VERSION);
6281         if (sra && sra->array.major_version == -1 &&
6282             strcmp(sra->text_version, "imsm") == 0)
6283                 is_member = 1;
6284         sysfs_free(sra);
6285         if (is_member) {
6286                 /* This is a member of a imsm container.  Load the container
6287                  * and try to create a volume
6288                  */
6289                 struct intel_super *super;
6290
6291                 if (load_super_imsm_all(st, cfd, (void **) &super, NULL, NULL, 1) == 0) {
6292                         st->sb = super;
6293                         strcpy(st->container_devnm, fd2devnm(cfd));
6294                         close(cfd);
6295                         return validate_geometry_imsm_volume(st, level, layout,
6296                                                              raiddisks, chunk,
6297                                                              size, data_offset, dev,
6298                                                              freesize, 1)
6299                                 ? 1 : -1;
6300                 }
6301         }
6302
6303         if (verbose)
6304                 pr_err("failed container membership check\n");
6305
6306         close(cfd);
6307         return 0;
6308 }
6309
6310 static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
6311 {
6312         struct intel_super *super = st->sb;
6313
6314         if (level && *level == UnSet)
6315                 *level = LEVEL_CONTAINER;
6316
6317         if (level && layout && *layout == UnSet)
6318                 *layout = imsm_level_to_layout(*level);
6319
6320         if (chunk && (*chunk == UnSet || *chunk == 0))
6321                 *chunk = imsm_default_chunk(super->orom);
6322 }
6323
6324 static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
6325
6326 static int kill_subarray_imsm(struct supertype *st)
6327 {
6328         /* remove the subarray currently referenced by ->current_vol */
6329         __u8 i;
6330         struct intel_dev **dp;
6331         struct intel_super *super = st->sb;
6332         __u8 current_vol = super->current_vol;
6333         struct imsm_super *mpb = super->anchor;
6334
6335         if (super->current_vol < 0)
6336                 return 2;
6337         super->current_vol = -1; /* invalidate subarray cursor */
6338
6339         /* block deletions that would change the uuid of active subarrays
6340          *
6341          * FIXME when immutable ids are available, but note that we'll
6342          * also need to fixup the invalidated/active subarray indexes in
6343          * mdstat
6344          */
6345         for (i = 0; i < mpb->num_raid_devs; i++) {
6346                 char subarray[4];
6347
6348                 if (i < current_vol)
6349                         continue;
6350                 sprintf(subarray, "%u", i);
6351                 if (is_subarray_active(subarray, st->devnm)) {
6352                         pr_err("deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
6353                                current_vol, i);
6354
6355                         return 2;
6356                 }
6357         }
6358
6359         if (st->update_tail) {
6360                 struct imsm_update_kill_array *u = xmalloc(sizeof(*u));
6361
6362                 u->type = update_kill_array;
6363                 u->dev_idx = current_vol;
6364                 append_metadata_update(st, u, sizeof(*u));
6365
6366                 return 0;
6367         }
6368
6369         for (dp = &super->devlist; *dp;)
6370                 if ((*dp)->index == current_vol) {
6371                         *dp = (*dp)->next;
6372                 } else {
6373                         handle_missing(super, (*dp)->dev);
6374                         if ((*dp)->index > current_vol)
6375                                 (*dp)->index--;
6376                         dp = &(*dp)->next;
6377                 }
6378
6379         /* no more raid devices, all active components are now spares,
6380          * but of course failed are still failed
6381          */
6382         if (--mpb->num_raid_devs == 0) {
6383                 struct dl *d;
6384
6385                 for (d = super->disks; d; d = d->next)
6386                         if (d->index > -2)
6387                                 mark_spare(d);
6388         }
6389
6390         super->updates_pending++;
6391
6392         return 0;
6393 }
6394
6395 static int update_subarray_imsm(struct supertype *st, char *subarray,
6396                                 char *update, struct mddev_ident *ident)
6397 {
6398         /* update the subarray currently referenced by ->current_vol */
6399         struct intel_super *super = st->sb;
6400         struct imsm_super *mpb = super->anchor;
6401
6402         if (strcmp(update, "name") == 0) {
6403                 char *name = ident->name;
6404                 char *ep;
6405                 int vol;
6406
6407                 if (is_subarray_active(subarray, st->devnm)) {
6408                         pr_err("Unable to update name of active subarray\n");
6409                         return 2;
6410                 }
6411
6412                 if (!check_name(super, name, 0))
6413                         return 2;
6414
6415                 vol = strtoul(subarray, &ep, 10);
6416                 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
6417                         return 2;
6418
6419                 if (st->update_tail) {
6420                         struct imsm_update_rename_array *u = xmalloc(sizeof(*u));
6421
6422                         u->type = update_rename_array;
6423                         u->dev_idx = vol;
6424                         snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
6425                         append_metadata_update(st, u, sizeof(*u));
6426                 } else {
6427                         struct imsm_dev *dev;
6428                         int i;
6429
6430                         dev = get_imsm_dev(super, vol);
6431                         snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
6432                         for (i = 0; i < mpb->num_raid_devs; i++) {
6433                                 dev = get_imsm_dev(super, i);
6434                                 handle_missing(super, dev);
6435                         }
6436                         super->updates_pending++;
6437                 }
6438         } else
6439                 return 2;
6440
6441         return 0;
6442 }
6443 #endif /* MDASSEMBLE */
6444
6445 static int is_gen_migration(struct imsm_dev *dev)
6446 {
6447         if (dev == NULL)
6448                 return 0;
6449
6450         if (!dev->vol.migr_state)
6451                 return 0;
6452
6453         if (migr_type(dev) == MIGR_GEN_MIGR)
6454                 return 1;
6455
6456         return 0;
6457 }
6458
6459 static int is_rebuilding(struct imsm_dev *dev)
6460 {
6461         struct imsm_map *migr_map;
6462
6463         if (!dev->vol.migr_state)
6464                 return 0;
6465
6466         if (migr_type(dev) != MIGR_REBUILD)
6467                 return 0;
6468
6469         migr_map = get_imsm_map(dev, MAP_1);
6470
6471         if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
6472                 return 1;
6473         else
6474                 return 0;
6475 }
6476
6477 #ifndef MDASSEMBLE
6478 static int is_initializing(struct imsm_dev *dev)
6479 {
6480         struct imsm_map *migr_map;
6481
6482         if (!dev->vol.migr_state)
6483                 return 0;
6484
6485         if (migr_type(dev) != MIGR_INIT)
6486                 return 0;
6487
6488         migr_map = get_imsm_map(dev, MAP_1);
6489
6490         if (migr_map->map_state == IMSM_T_STATE_UNINITIALIZED)
6491                 return 1;
6492
6493         return 0;
6494 }
6495 #endif
6496
6497 static void update_recovery_start(struct intel_super *super,
6498                                         struct imsm_dev *dev,
6499                                         struct mdinfo *array)
6500 {
6501         struct mdinfo *rebuild = NULL;
6502         struct mdinfo *d;
6503         __u32 units;
6504
6505         if (!is_rebuilding(dev))
6506                 return;
6507
6508         /* Find the rebuild target, but punt on the dual rebuild case */
6509         for (d = array->devs; d; d = d->next)
6510                 if (d->recovery_start == 0) {
6511                         if (rebuild)
6512                                 return;
6513                         rebuild = d;
6514                 }
6515
6516         if (!rebuild) {
6517                 /* (?) none of the disks are marked with
6518                  * IMSM_ORD_REBUILD, so assume they are missing and the
6519                  * disk_ord_tbl was not correctly updated
6520                  */
6521                 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
6522                 return;
6523         }
6524
6525         units = __le32_to_cpu(dev->vol.curr_migr_unit);
6526         rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
6527 }
6528
6529 #ifndef MDASSEMBLE
6530 static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
6531 #endif
6532
6533 static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
6534 {
6535         /* Given a container loaded by load_super_imsm_all,
6536          * extract information about all the arrays into
6537          * an mdinfo tree.
6538          * If 'subarray' is given, just extract info about that array.
6539          *
6540          * For each imsm_dev create an mdinfo, fill it in,
6541          *  then look for matching devices in super->disks
6542          *  and create appropriate device mdinfo.
6543          */
6544         struct intel_super *super = st->sb;
6545         struct imsm_super *mpb = super->anchor;
6546         struct mdinfo *rest = NULL;
6547         unsigned int i;
6548         int sb_errors = 0;
6549         struct dl *d;
6550         int spare_disks = 0;
6551
6552         /* do not assemble arrays when not all attributes are supported */
6553         if (imsm_check_attributes(mpb->attributes) == 0) {
6554                 sb_errors = 1;
6555                 pr_err("Unsupported attributes in IMSM metadata."
6556                         "Arrays activation is blocked.\n");
6557         }
6558
6559         /* check for bad blocks */
6560         if (imsm_bbm_log_size(super->anchor)) {
6561                 pr_err("BBM log found in IMSM metadata."
6562                        "Arrays activation is blocked.\n");
6563                 sb_errors = 1;
6564         }
6565
6566
6567         /* count spare devices, not used in maps
6568          */
6569         for (d = super->disks; d; d = d->next)
6570                 if (d->index == -1)
6571                         spare_disks++;
6572
6573         for (i = 0; i < mpb->num_raid_devs; i++) {
6574                 struct imsm_dev *dev;
6575                 struct imsm_map *map;
6576                 struct imsm_map *map2;
6577                 struct mdinfo *this;
6578                 int slot;
6579 #ifndef MDASSEMBLE
6580                 int chunk;
6581 #endif
6582                 char *ep;
6583
6584                 if (subarray &&
6585                     (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
6586                         continue;
6587
6588                 dev = get_imsm_dev(super, i);
6589                 map = get_imsm_map(dev, MAP_0);
6590                 map2 = get_imsm_map(dev, MAP_1);
6591
6592                 /* do not publish arrays that are in the middle of an
6593                  * unsupported migration
6594                  */
6595                 if (dev->vol.migr_state &&
6596                     (migr_type(dev) == MIGR_STATE_CHANGE)) {
6597                         pr_err("cannot assemble volume '%.16s':"
6598                                 " unsupported migration in progress\n",
6599                                 dev->volume);
6600                         continue;
6601                 }
6602                 /* do not publish arrays that are not support by controller's
6603                  * OROM/EFI
6604                  */
6605
6606                 this = xmalloc(sizeof(*this));
6607
6608                 super->current_vol = i;
6609                 getinfo_super_imsm_volume(st, this, NULL);
6610                 this->next = rest;
6611 #ifndef MDASSEMBLE
6612                 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
6613                 /* mdadm does not support all metadata features- set the bit in all arrays state */
6614                 if (!validate_geometry_imsm_orom(super,
6615                                                  get_imsm_raid_level(map), /* RAID level */
6616                                                  imsm_level_to_layout(get_imsm_raid_level(map)),
6617                                                  map->num_members, /* raid disks */
6618                                                  &chunk, join_u32(dev->size_low, dev->size_high),
6619                                                  1 /* verbose */)) {
6620                         pr_err("IMSM RAID geometry validation"
6621                                 " failed.  Array %s activation is blocked.\n",
6622                                 dev->volume);
6623                         this->array.state |=
6624                           (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
6625                           (1<<MD_SB_BLOCK_VOLUME);
6626                 }
6627 #endif
6628
6629                 /* if array has bad blocks, set suitable bit in all arrays state */
6630                 if (sb_errors)
6631                         this->array.state |=
6632                           (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
6633                           (1<<MD_SB_BLOCK_VOLUME);
6634
6635                 for (slot = 0 ; slot <  map->num_members; slot++) {
6636                         unsigned long long recovery_start;
6637                         struct mdinfo *info_d;
6638                         struct dl *d;
6639                         int idx;
6640                         int skip;
6641                         __u32 ord;
6642
6643                         skip = 0;
6644                         idx = get_imsm_disk_idx(dev, slot, MAP_0);
6645                         ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
6646                         for (d = super->disks; d ; d = d->next)
6647                                 if (d->index == idx)
6648                                         break;
6649
6650                         recovery_start = MaxSector;
6651                         if (d == NULL)
6652                                 skip = 1;
6653                         if (d && is_failed(&d->disk))
6654                                 skip = 1;
6655                         if (ord & IMSM_ORD_REBUILD)
6656                                 recovery_start = 0;
6657
6658                         /*
6659                          * if we skip some disks the array will be assmebled degraded;
6660                          * reset resync start to avoid a dirty-degraded
6661                          * situation when performing the intial sync
6662                          *
6663                          * FIXME handle dirty degraded
6664                          */
6665                         if ((skip || recovery_start == 0) && !dev->vol.dirty)
6666                                 this->resync_start = MaxSector;
6667                         if (skip)
6668                                 continue;
6669
6670                         info_d = xcalloc(1, sizeof(*info_d));
6671                         info_d->next = this->devs;
6672                         this->devs = info_d;
6673
6674                         info_d->disk.number = d->index;
6675                         info_d->disk.major = d->major;
6676                         info_d->disk.minor = d->minor;
6677                         info_d->disk.raid_disk = slot;
6678                         info_d->recovery_start = recovery_start;
6679                         if (map2) {
6680                                 if (slot < map2->num_members)
6681                                         info_d->disk.state = (1 << MD_DISK_ACTIVE);
6682                                 else
6683                                         this->array.spare_disks++;
6684                         } else {
6685                                 if (slot < map->num_members)
6686                                         info_d->disk.state = (1 << MD_DISK_ACTIVE);
6687                                 else
6688                                         this->array.spare_disks++;
6689                         }
6690                         if (info_d->recovery_start == MaxSector)
6691                                 this->array.working_disks++;
6692
6693                         info_d->events = __le32_to_cpu(mpb->generation_num);
6694                         info_d->data_offset = pba_of_lba0(map);
6695                         info_d->component_size = blocks_per_member(map);
6696                 }
6697                 /* now that the disk list is up-to-date fixup recovery_start */
6698                 update_recovery_start(super, dev, this);
6699                 this->array.spare_disks += spare_disks;
6700
6701 #ifndef MDASSEMBLE
6702                 /* check for reshape */
6703                 if (this->reshape_active == 1)
6704                         recover_backup_imsm(st, this);
6705 #endif
6706                 rest = this;
6707         }
6708
6709         return rest;
6710 }
6711
6712
6713 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
6714                                 int failed, int look_in_map)
6715 {
6716         struct imsm_map *map;
6717
6718         map = get_imsm_map(dev, look_in_map);
6719
6720         if (!failed)
6721                 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
6722                         IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
6723
6724         switch (get_imsm_raid_level(map)) {
6725         case 0:
6726                 return IMSM_T_STATE_FAILED;
6727                 break;
6728         case 1:
6729                 if (failed < map->num_members)
6730                         return IMSM_T_STATE_DEGRADED;
6731                 else
6732                         return IMSM_T_STATE_FAILED;
6733                 break;
6734         case 10:
6735         {
6736                 /**
6737                  * check to see if any mirrors have failed, otherwise we
6738                  * are degraded.  Even numbered slots are mirrored on
6739                  * slot+1
6740                  */
6741                 int i;
6742                 /* gcc -Os complains that this is unused */
6743                 int insync = insync;
6744
6745                 for (i = 0; i < map->num_members; i++) {
6746                         __u32 ord = get_imsm_ord_tbl_ent(dev, i, MAP_X);
6747                         int idx = ord_to_idx(ord);
6748                         struct imsm_disk *disk;
6749
6750                         /* reset the potential in-sync count on even-numbered
6751                          * slots.  num_copies is always 2 for imsm raid10
6752                          */
6753                         if ((i & 1) == 0)
6754                                 insync = 2;
6755
6756                         disk = get_imsm_disk(super, idx);
6757                         if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
6758                                 insync--;
6759
6760                         /* no in-sync disks left in this mirror the
6761                          * array has failed
6762                          */
6763                         if (insync == 0)
6764                                 return IMSM_T_STATE_FAILED;
6765                 }
6766
6767                 return IMSM_T_STATE_DEGRADED;
6768         }
6769         case 5:
6770                 if (failed < 2)
6771                         return IMSM_T_STATE_DEGRADED;
6772                 else
6773                         return IMSM_T_STATE_FAILED;
6774                 break;
6775         default:
6776                 break;
6777         }
6778
6779         return map->map_state;
6780 }
6781
6782 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
6783                              int look_in_map)
6784 {
6785         int i;
6786         int failed = 0;
6787         struct imsm_disk *disk;
6788         struct imsm_map *map = get_imsm_map(dev, MAP_0);
6789         struct imsm_map *prev = get_imsm_map(dev, MAP_1);
6790         struct imsm_map *map_for_loop;
6791         __u32 ord;
6792         int idx;
6793         int idx_1;
6794
6795         /* at the beginning of migration we set IMSM_ORD_REBUILD on
6796          * disks that are being rebuilt.  New failures are recorded to
6797          * map[0].  So we look through all the disks we started with and
6798          * see if any failures are still present, or if any new ones
6799          * have arrived
6800          */
6801         map_for_loop = map;
6802         if (prev && (map->num_members < prev->num_members))
6803                 map_for_loop = prev;
6804
6805         for (i = 0; i < map_for_loop->num_members; i++) {
6806                 idx_1 = -255;
6807                 /* when MAP_X is passed both maps failures are counted
6808                  */
6809                 if (prev &&
6810                     ((look_in_map == MAP_1) || (look_in_map == MAP_X)) &&
6811                     (i < prev->num_members)) {
6812                         ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
6813                         idx_1 = ord_to_idx(ord);
6814
6815                         disk = get_imsm_disk(super, idx_1);
6816                         if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
6817                                 failed++;
6818                 }
6819                 if (((look_in_map == MAP_0) || (look_in_map == MAP_X)) &&
6820                     (i < map->num_members)) {
6821                         ord = __le32_to_cpu(map->disk_ord_tbl[i]);
6822                         idx = ord_to_idx(ord);
6823
6824                         if (idx != idx_1) {
6825                                 disk = get_imsm_disk(super, idx);
6826                                 if (!disk || is_failed(disk) ||
6827                                     ord & IMSM_ORD_REBUILD)
6828                                         failed++;
6829                         }
6830                 }
6831         }
6832
6833         return failed;
6834 }
6835
6836 #ifndef MDASSEMBLE
6837 static int imsm_open_new(struct supertype *c, struct active_array *a,
6838                          char *inst)
6839 {
6840         struct intel_super *super = c->sb;
6841         struct imsm_super *mpb = super->anchor;
6842
6843         if (atoi(inst) >= mpb->num_raid_devs) {
6844                 fprintf(stderr, "%s: subarry index %d, out of range\n",
6845                         __func__, atoi(inst));
6846                 return -ENODEV;
6847         }
6848
6849         dprintf("imsm: open_new %s\n", inst);
6850         a->info.container_member = atoi(inst);
6851         return 0;
6852 }
6853
6854 static int is_resyncing(struct imsm_dev *dev)
6855 {
6856         struct imsm_map *migr_map;
6857
6858         if (!dev->vol.migr_state)
6859                 return 0;
6860
6861         if (migr_type(dev) == MIGR_INIT ||
6862             migr_type(dev) == MIGR_REPAIR)
6863                 return 1;
6864
6865         if (migr_type(dev) == MIGR_GEN_MIGR)
6866                 return 0;
6867
6868         migr_map = get_imsm_map(dev, MAP_1);
6869
6870         if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
6871             (dev->vol.migr_type != MIGR_GEN_MIGR))
6872                 return 1;
6873         else
6874                 return 0;
6875 }
6876
6877 /* return true if we recorded new information */
6878 static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
6879 {
6880         __u32 ord;
6881         int slot;
6882         struct imsm_map *map;
6883         char buf[MAX_RAID_SERIAL_LEN+3];
6884         unsigned int len, shift = 0;
6885
6886         /* new failures are always set in map[0] */
6887         map = get_imsm_map(dev, MAP_0);
6888
6889         slot = get_imsm_disk_slot(map, idx);
6890         if (slot < 0)
6891                 return 0;
6892
6893         ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
6894         if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
6895                 return 0;
6896
6897         memcpy(buf, disk->serial, MAX_RAID_SERIAL_LEN);
6898         buf[MAX_RAID_SERIAL_LEN] = '\000';
6899         strcat(buf, ":0");
6900         if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN)
6901                 shift = len - MAX_RAID_SERIAL_LEN + 1;
6902         strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN);
6903
6904         disk->status |= FAILED_DISK;
6905         set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
6906         /* mark failures in second map if second map exists and this disk
6907          * in this slot.
6908          * This is valid for migration, initialization and rebuild
6909          */
6910         if (dev->vol.migr_state) {
6911                 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
6912                 int slot2 = get_imsm_disk_slot(map2, idx);
6913
6914                 if ((slot2 < map2->num_members) &&
6915                     (slot2 >= 0))
6916                         set_imsm_ord_tbl_ent(map2, slot2,
6917                                              idx | IMSM_ORD_REBUILD);
6918         }
6919         if (map->failed_disk_num == 0xff)
6920                 map->failed_disk_num = slot;
6921         return 1;
6922 }
6923
6924 static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
6925 {
6926         mark_failure(dev, disk, idx);
6927
6928         if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
6929                 return;
6930
6931         disk->scsi_id = __cpu_to_le32(~(__u32)0);
6932         memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
6933 }
6934
6935 static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
6936 {
6937         struct dl *dl;
6938
6939         if (!super->missing)
6940                 return;
6941
6942         /* When orom adds replacement for missing disk it does
6943          * not remove entry of missing disk, but just updates map with
6944          * new added disk. So it is not enough just to test if there is
6945          * any missing disk, we have to look if there are any failed disks
6946          * in map to stop migration */
6947
6948         dprintf("imsm: mark missing\n");
6949         /* end process for initialization and rebuild only
6950          */
6951         if (is_gen_migration(dev) == 0) {
6952                 __u8 map_state;
6953                 int failed;
6954
6955                 failed = imsm_count_failed(super, dev, MAP_0);
6956                 map_state = imsm_check_degraded(super, dev, failed, MAP_0);
6957
6958                 if (failed)
6959                         end_migration(dev, super, map_state);
6960         }
6961         for (dl = super->missing; dl; dl = dl->next)
6962                 mark_missing(dev, &dl->disk, dl->index);
6963         super->updates_pending++;
6964 }
6965
6966 static unsigned long long imsm_set_array_size(struct imsm_dev *dev,
6967                                               long long new_size)
6968 {
6969         int used_disks = imsm_num_data_members(dev, MAP_0);
6970         unsigned long long array_blocks;
6971         struct imsm_map *map;
6972
6973         if (used_disks == 0) {
6974                 /* when problems occures
6975                  * return current array_blocks value
6976                  */
6977                 array_blocks = __le32_to_cpu(dev->size_high);
6978                 array_blocks = array_blocks << 32;
6979                 array_blocks += __le32_to_cpu(dev->size_low);
6980
6981                 return array_blocks;
6982         }
6983
6984         /* set array size in metadata
6985          */
6986         if (new_size <= 0) {
6987                 /* OLCE size change is caused by added disks
6988                  */
6989                 map = get_imsm_map(dev, MAP_0);
6990                 array_blocks = blocks_per_member(map) * used_disks;
6991         } else {
6992                 /* Online Volume Size Change
6993                  * Using  available free space
6994                  */
6995                 array_blocks = new_size;
6996         }
6997
6998         /* round array size down to closest MB
6999          */
7000         array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
7001         dev->size_low = __cpu_to_le32((__u32)array_blocks);
7002         dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
7003
7004         return array_blocks;
7005 }
7006
7007 static void imsm_set_disk(struct active_array *a, int n, int state);
7008
7009 static void imsm_progress_container_reshape(struct intel_super *super)
7010 {
7011         /* if no device has a migr_state, but some device has a
7012          * different number of members than the previous device, start
7013          * changing the number of devices in this device to match
7014          * previous.
7015          */
7016         struct imsm_super *mpb = super->anchor;
7017         int prev_disks = -1;
7018         int i;
7019         int copy_map_size;
7020
7021         for (i = 0; i < mpb->num_raid_devs; i++) {
7022                 struct imsm_dev *dev = get_imsm_dev(super, i);
7023                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
7024                 struct imsm_map *map2;
7025                 int prev_num_members;
7026
7027                 if (dev->vol.migr_state)
7028                         return;
7029
7030                 if (prev_disks == -1)
7031                         prev_disks = map->num_members;
7032                 if (prev_disks == map->num_members)
7033                         continue;
7034
7035                 /* OK, this array needs to enter reshape mode.
7036                  * i.e it needs a migr_state
7037                  */
7038
7039                 copy_map_size = sizeof_imsm_map(map);
7040                 prev_num_members = map->num_members;
7041                 map->num_members = prev_disks;
7042                 dev->vol.migr_state = 1;
7043                 dev->vol.curr_migr_unit = 0;
7044                 set_migr_type(dev, MIGR_GEN_MIGR);
7045                 for (i = prev_num_members;
7046                      i < map->num_members; i++)
7047                         set_imsm_ord_tbl_ent(map, i, i);
7048                 map2 = get_imsm_map(dev, MAP_1);
7049                 /* Copy the current map */
7050                 memcpy(map2, map, copy_map_size);
7051                 map2->num_members = prev_num_members;
7052
7053                 imsm_set_array_size(dev, -1);
7054                 super->clean_migration_record_by_mdmon = 1;
7055                 super->updates_pending++;
7056         }
7057 }
7058
7059 /* Handle dirty -> clean transititions, resync and reshape.  Degraded and rebuild
7060  * states are handled in imsm_set_disk() with one exception, when a
7061  * resync is stopped due to a new failure this routine will set the
7062  * 'degraded' state for the array.
7063  */
7064 static int imsm_set_array_state(struct active_array *a, int consistent)
7065 {
7066         int inst = a->info.container_member;
7067         struct intel_super *super = a->container->sb;
7068         struct imsm_dev *dev = get_imsm_dev(super, inst);
7069         struct imsm_map *map = get_imsm_map(dev, MAP_0);
7070         int failed = imsm_count_failed(super, dev, MAP_0);
7071         __u8 map_state = imsm_check_degraded(super, dev, failed, MAP_0);
7072         __u32 blocks_per_unit;
7073
7074         if (dev->vol.migr_state &&
7075             dev->vol.migr_type  == MIGR_GEN_MIGR) {
7076                 /* array state change is blocked due to reshape action
7077                  * We might need to
7078                  * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
7079                  * - finish the reshape (if last_checkpoint is big and action != reshape)
7080                  * - update curr_migr_unit
7081                  */
7082                 if (a->curr_action == reshape) {
7083                         /* still reshaping, maybe update curr_migr_unit */
7084                         goto mark_checkpoint;
7085                 } else {
7086                         if (a->last_checkpoint == 0 && a->prev_action == reshape) {
7087                                 /* for some reason we aborted the reshape.
7088                                  *
7089                                  * disable automatic metadata rollback
7090                                  * user action is required to recover process
7091                                  */
7092                                 if (0) {
7093                                         struct imsm_map *map2 =
7094                                                 get_imsm_map(dev, MAP_1);
7095                                         dev->vol.migr_state = 0;
7096                                         set_migr_type(dev, 0);
7097                                         dev->vol.curr_migr_unit = 0;
7098                                         memcpy(map, map2,
7099                                                sizeof_imsm_map(map2));
7100                                         super->updates_pending++;
7101                                 }
7102                         }
7103                         if (a->last_checkpoint >= a->info.component_size) {
7104                                 unsigned long long array_blocks;
7105                                 int used_disks;
7106                                 struct mdinfo *mdi;
7107
7108                                 used_disks = imsm_num_data_members(dev, MAP_0);
7109                                 if (used_disks > 0) {
7110                                         array_blocks =
7111                                                 blocks_per_member(map) *
7112                                                 used_disks;
7113                                         /* round array size down to closest MB
7114                                          */
7115                                         array_blocks = (array_blocks
7116                                                         >> SECT_PER_MB_SHIFT)
7117                                                 << SECT_PER_MB_SHIFT;
7118                                         a->info.custom_array_size = array_blocks;
7119                                         /* encourage manager to update array
7120                                          * size
7121                                          */
7122
7123                                         a->check_reshape = 1;
7124                                 }
7125                                 /* finalize online capacity expansion/reshape */
7126                                 for (mdi = a->info.devs; mdi; mdi = mdi->next)
7127                                         imsm_set_disk(a,
7128                                                       mdi->disk.raid_disk,
7129                                                       mdi->curr_state);
7130
7131                                 imsm_progress_container_reshape(super);
7132                         }
7133                 }
7134         }
7135
7136         /* before we activate this array handle any missing disks */
7137         if (consistent == 2)
7138                 handle_missing(super, dev);
7139
7140         if (consistent == 2 &&
7141             (!is_resync_complete(&a->info) ||
7142              map_state != IMSM_T_STATE_NORMAL ||
7143              dev->vol.migr_state))
7144                 consistent = 0;
7145
7146         if (is_resync_complete(&a->info)) {
7147                 /* complete intialization / resync,
7148                  * recovery and interrupted recovery is completed in
7149                  * ->set_disk
7150                  */
7151                 if (is_resyncing(dev)) {
7152                         dprintf("imsm: mark resync done\n");
7153                         end_migration(dev, super, map_state);
7154                         super->updates_pending++;
7155                         a->last_checkpoint = 0;
7156                 }
7157         } else if ((!is_resyncing(dev) && !failed) &&
7158                    (imsm_reshape_blocks_arrays_changes(super) == 0)) {
7159                 /* mark the start of the init process if nothing is failed */
7160                 dprintf("imsm: mark resync start\n");
7161                 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
7162                         migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
7163                 else
7164                         migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
7165                 super->updates_pending++;
7166         }
7167
7168 mark_checkpoint:
7169         /* skip checkpointing for general migration,
7170          * it is controlled in mdadm
7171          */
7172         if (is_gen_migration(dev))
7173                 goto skip_mark_checkpoint;
7174
7175         /* check if we can update curr_migr_unit from resync_start, recovery_start */
7176         blocks_per_unit = blocks_per_migr_unit(super, dev);
7177         if (blocks_per_unit) {
7178                 __u32 units32;
7179                 __u64 units;
7180
7181                 units = a->last_checkpoint / blocks_per_unit;
7182                 units32 = units;
7183
7184                 /* check that we did not overflow 32-bits, and that
7185                  * curr_migr_unit needs updating
7186                  */
7187                 if (units32 == units &&
7188                     units32 != 0 &&
7189                     __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
7190                         dprintf("imsm: mark checkpoint (%u)\n", units32);
7191                         dev->vol.curr_migr_unit = __cpu_to_le32(units32);
7192                         super->updates_pending++;
7193                 }
7194         }
7195
7196 skip_mark_checkpoint:
7197         /* mark dirty / clean */
7198         if (dev->vol.dirty != !consistent) {
7199                 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
7200                 if (consistent)
7201                         dev->vol.dirty = 0;
7202                 else
7203                         dev->vol.dirty = 1;
7204                 super->updates_pending++;
7205         }
7206
7207         return consistent;
7208 }
7209
7210 static void imsm_set_disk(struct active_array *a, int n, int state)
7211 {
7212         int inst = a->info.container_member;
7213         struct intel_super *super = a->container->sb;
7214         struct imsm_dev *dev = get_imsm_dev(super, inst);
7215         struct imsm_map *map = get_imsm_map(dev, MAP_0);
7216         struct imsm_disk *disk;
7217         struct mdinfo *mdi;
7218         int recovery_not_finished = 0;
7219         int failed;
7220         __u32 ord;
7221         __u8 map_state;
7222
7223         if (n > map->num_members)
7224                 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
7225                         n, map->num_members - 1);
7226
7227         if (n < 0)
7228                 return;
7229
7230         dprintf("imsm: set_disk %d:%x\n", n, state);
7231
7232         ord = get_imsm_ord_tbl_ent(dev, n, MAP_0);
7233         disk = get_imsm_disk(super, ord_to_idx(ord));
7234
7235         /* check for new failures */
7236         if (state & DS_FAULTY) {
7237                 if (mark_failure(dev, disk, ord_to_idx(ord)))
7238                         super->updates_pending++;
7239         }
7240
7241         /* check if in_sync */
7242         if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
7243                 struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
7244
7245                 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
7246                 super->updates_pending++;
7247         }
7248
7249         failed = imsm_count_failed(super, dev, MAP_0);
7250         map_state = imsm_check_degraded(super, dev, failed, MAP_0);
7251
7252         /* check if recovery complete, newly degraded, or failed */
7253         dprintf("imsm: Detected transition to state ");
7254         switch (map_state) {
7255         case IMSM_T_STATE_NORMAL: /* transition to normal state */
7256                 dprintf("normal: ");
7257                 if (is_rebuilding(dev)) {
7258                         dprintf("while rebuilding");
7259                         /* check if recovery is really finished */
7260                         for (mdi = a->info.devs; mdi ; mdi = mdi->next)
7261                                 if (mdi->recovery_start != MaxSector) {
7262                                         recovery_not_finished = 1;
7263                                         break;
7264                                 }
7265                         if (recovery_not_finished) {
7266                                 dprintf("\nimsm: Rebuild has not finished yet, "
7267                                                 "state not changed");
7268                                 if (a->last_checkpoint < mdi->recovery_start) {
7269                                         a->last_checkpoint = mdi->recovery_start;
7270                                         super->updates_pending++;
7271                                 }
7272                                 break;
7273                         }
7274                         end_migration(dev, super, map_state);
7275                         map = get_imsm_map(dev, MAP_0);
7276                         map->failed_disk_num = ~0;
7277                         super->updates_pending++;
7278                         a->last_checkpoint = 0;
7279                         break;
7280                 }
7281                 if (is_gen_migration(dev)) {
7282                         dprintf("while general migration");
7283                         if (a->last_checkpoint >= a->info.component_size)
7284                                 end_migration(dev, super, map_state);
7285                         else
7286                                 map->map_state = map_state;
7287                         map = get_imsm_map(dev, MAP_0);
7288                         map->failed_disk_num = ~0;
7289                         super->updates_pending++;
7290                         break;
7291                 }
7292         break;
7293         case IMSM_T_STATE_DEGRADED: /* transition to degraded state */
7294                 dprintf("degraded: ");
7295                 if ((map->map_state != map_state) &&
7296                     !dev->vol.migr_state) {
7297                         dprintf("mark degraded");
7298                         map->map_state = map_state;
7299                         super->updates_pending++;
7300                         a->last_checkpoint = 0;
7301                         break;
7302                 }
7303                 if (is_rebuilding(dev)) {
7304                         dprintf("while rebuilding.");
7305                         if (map->map_state != map_state)  {
7306                                 dprintf(" Map state change");
7307                                 end_migration(dev, super, map_state);
7308                                 super->updates_pending++;
7309                         }
7310                         break;
7311                 }
7312                 if (is_gen_migration(dev)) {
7313                         dprintf("while general migration");
7314                         if (a->last_checkpoint >= a->info.component_size)
7315                                 end_migration(dev, super, map_state);
7316                         else {
7317                                 map->map_state = map_state;
7318                                 manage_second_map(super, dev);
7319                         }
7320                         super->updates_pending++;
7321                         break;
7322                 }
7323                 if (is_initializing(dev)) {
7324                         dprintf("while initialization.");
7325                         map->map_state = map_state;
7326                         super->updates_pending++;
7327                         break;
7328                 }
7329         break;
7330         case IMSM_T_STATE_FAILED: /* transition to failed state */
7331                 dprintf("failed: ");
7332                 if (is_gen_migration(dev)) {
7333                         dprintf("while general migration");
7334                         map->map_state = map_state;
7335                         super->updates_pending++;
7336                         break;
7337                 }
7338                 if (map->map_state != map_state) {
7339                         dprintf("mark failed");
7340                         end_migration(dev, super, map_state);
7341                         super->updates_pending++;
7342                         a->last_checkpoint = 0;
7343                         break;
7344                 }
7345         break;
7346         default:
7347                 dprintf("state %i\n", map_state);
7348         }
7349         dprintf("\n");
7350
7351 }
7352
7353 static int store_imsm_mpb(int fd, struct imsm_super *mpb)
7354 {
7355         void *buf = mpb;
7356         __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
7357         unsigned long long dsize;
7358         unsigned long long sectors;
7359
7360         get_dev_size(fd, NULL, &dsize);
7361
7362         if (mpb_size > 512) {
7363                 /* -1 to account for anchor */
7364                 sectors = mpb_sectors(mpb) - 1;
7365
7366                 /* write the extended mpb to the sectors preceeding the anchor */
7367                 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
7368                         return 1;
7369
7370                 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
7371                     != 512 * sectors)
7372                         return 1;
7373         }
7374
7375         /* first block is stored on second to last sector of the disk */
7376         if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
7377                 return 1;
7378
7379         if (write(fd, buf, 512) != 512)
7380                 return 1;
7381
7382         return 0;
7383 }
7384
7385 static void imsm_sync_metadata(struct supertype *container)
7386 {
7387         struct intel_super *super = container->sb;
7388
7389         dprintf("sync metadata: %d\n", super->updates_pending);
7390         if (!super->updates_pending)
7391                 return;
7392
7393         write_super_imsm(container, 0);
7394
7395         super->updates_pending = 0;
7396 }
7397
7398 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
7399 {
7400         struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
7401         int i = get_imsm_disk_idx(dev, idx, MAP_X);
7402         struct dl *dl;
7403
7404         for (dl = super->disks; dl; dl = dl->next)
7405                 if (dl->index == i)
7406                         break;
7407
7408         if (dl && is_failed(&dl->disk))
7409                 dl = NULL;
7410
7411         if (dl)
7412                 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
7413
7414         return dl;
7415 }
7416
7417 static struct dl *imsm_add_spare(struct intel_super *super, int slot,
7418                                  struct active_array *a, int activate_new,
7419                                  struct mdinfo *additional_test_list)
7420 {
7421         struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
7422         int idx = get_imsm_disk_idx(dev, slot, MAP_X);
7423         struct imsm_super *mpb = super->anchor;
7424         struct imsm_map *map;
7425         unsigned long long pos;
7426         struct mdinfo *d;
7427         struct extent *ex;
7428         int i, j;
7429         int found;
7430         __u32 array_start = 0;
7431         __u32 array_end = 0;
7432         struct dl *dl;
7433         struct mdinfo *test_list;
7434
7435         for (dl = super->disks; dl; dl = dl->next) {
7436                 /* If in this array, skip */
7437                 for (d = a->info.devs ; d ; d = d->next)
7438                         if (d->state_fd >= 0 &&
7439                             d->disk.major == dl->major &&
7440                             d->disk.minor == dl->minor) {
7441                                 dprintf("%x:%x already in array\n",
7442                                         dl->major, dl->minor);
7443                                 break;
7444                         }
7445                 if (d)
7446                         continue;
7447                 test_list = additional_test_list;
7448                 while (test_list) {
7449                         if (test_list->disk.major == dl->major &&
7450                             test_list->disk.minor == dl->minor) {
7451                                 dprintf("%x:%x already in additional test list\n",
7452                                         dl->major, dl->minor);
7453                                 break;
7454                         }
7455                         test_list = test_list->next;
7456                 }
7457                 if (test_list)
7458                         continue;
7459
7460                 /* skip in use or failed drives */
7461                 if (is_failed(&dl->disk) || idx == dl->index ||
7462                     dl->index == -2) {
7463                         dprintf("%x:%x status (failed: %d index: %d)\n",
7464                                 dl->major, dl->minor, is_failed(&dl->disk), idx);
7465                         continue;
7466                 }
7467
7468                 /* skip pure spares when we are looking for partially
7469                  * assimilated drives
7470                  */
7471                 if (dl->index == -1 && !activate_new)
7472                         continue;
7473
7474                 /* Does this unused device have the requisite free space?
7475                  * It needs to be able to cover all member volumes
7476                  */
7477                 ex = get_extents(super, dl);
7478                 if (!ex) {
7479                         dprintf("cannot get extents\n");
7480                         continue;
7481                 }
7482                 for (i = 0; i < mpb->num_raid_devs; i++) {
7483                         dev = get_imsm_dev(super, i);
7484                         map = get_imsm_map(dev, MAP_0);
7485
7486                         /* check if this disk is already a member of
7487                          * this array
7488                          */
7489                         if (get_imsm_disk_slot(map, dl->index) >= 0)
7490                                 continue;
7491
7492                         found = 0;
7493                         j = 0;
7494                         pos = 0;
7495                         array_start = pba_of_lba0(map);
7496                         array_end = array_start +
7497                                     blocks_per_member(map) - 1;
7498
7499                         do {
7500                                 /* check that we can start at pba_of_lba0 with
7501                                  * blocks_per_member of space
7502                                  */
7503                                 if (array_start >= pos && array_end < ex[j].start) {
7504                                         found = 1;
7505                                         break;
7506                                 }
7507                                 pos = ex[j].start + ex[j].size;
7508                                 j++;
7509                         } while (ex[j-1].size);
7510
7511                         if (!found)
7512                                 break;
7513                 }
7514
7515                 free(ex);
7516                 if (i < mpb->num_raid_devs) {
7517                         dprintf("%x:%x does not have %u to %u available\n",
7518                                 dl->major, dl->minor, array_start, array_end);
7519                         /* No room */
7520                         continue;
7521                 }
7522                 return dl;
7523         }
7524
7525         return dl;
7526 }
7527
7528
7529 static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
7530 {
7531         struct imsm_dev *dev2;
7532         struct imsm_map *map;
7533         struct dl *idisk;
7534         int slot;
7535         int idx;
7536         __u8 state;
7537
7538         dev2 = get_imsm_dev(cont->sb, dev_idx);
7539         if (dev2) {
7540                 state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
7541                 if (state == IMSM_T_STATE_FAILED) {
7542                         map = get_imsm_map(dev2, MAP_0);
7543                         if (!map)
7544                                 return 1;
7545                         for (slot = 0; slot < map->num_members; slot++) {
7546                                 /*
7547                                  * Check if failed disks are deleted from intel
7548                                  * disk list or are marked to be deleted
7549                                  */
7550                                 idx = get_imsm_disk_idx(dev2, slot, MAP_X);
7551                                 idisk = get_imsm_dl_disk(cont->sb, idx);
7552                                 /*
7553                                  * Do not rebuild the array if failed disks
7554                                  * from failed sub-array are not removed from
7555                                  * container.
7556                                  */
7557                                 if (idisk &&
7558                                     is_failed(&idisk->disk) &&
7559                                     (idisk->action != DISK_REMOVE))
7560                                         return 0;
7561                         }
7562                 }
7563         }
7564         return 1;
7565 }
7566
7567 static struct mdinfo *imsm_activate_spare(struct active_array *a,
7568                                           struct metadata_update **updates)
7569 {
7570         /**
7571          * Find a device with unused free space and use it to replace a
7572          * failed/vacant region in an array.  We replace failed regions one a
7573          * array at a time.  The result is that a new spare disk will be added
7574          * to the first failed array and after the monitor has finished
7575          * propagating failures the remainder will be consumed.
7576          *
7577          * FIXME add a capability for mdmon to request spares from another
7578          * container.
7579          */
7580
7581         struct intel_super *super = a->container->sb;
7582         int inst = a->info.container_member;
7583         struct imsm_dev *dev = get_imsm_dev(super, inst);
7584         struct imsm_map *map = get_imsm_map(dev, MAP_0);
7585         int failed = a->info.array.raid_disks;
7586         struct mdinfo *rv = NULL;
7587         struct mdinfo *d;
7588         struct mdinfo *di;
7589         struct metadata_update *mu;
7590         struct dl *dl;
7591         struct imsm_update_activate_spare *u;
7592         int num_spares = 0;
7593         int i;
7594         int allowed;
7595
7596         for (d = a->info.devs ; d ; d = d->next) {
7597                 if ((d->curr_state & DS_FAULTY) &&
7598                         d->state_fd >= 0)
7599                         /* wait for Removal to happen */
7600                         return NULL;
7601                 if (d->state_fd >= 0)
7602                         failed--;
7603         }
7604
7605         dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
7606                 inst, failed, a->info.array.raid_disks, a->info.array.level);
7607
7608         if (imsm_reshape_blocks_arrays_changes(super))
7609                         return NULL;
7610
7611         /* Cannot activate another spare if rebuild is in progress already
7612          */
7613         if (is_rebuilding(dev)) {
7614                 dprintf("imsm: No spare activation allowed. "
7615                         "Rebuild in progress already.\n");
7616                 return NULL;
7617         }
7618
7619         if (a->info.array.level == 4)
7620                 /* No repair for takeovered array
7621                  * imsm doesn't support raid4
7622                  */
7623                 return NULL;
7624
7625         if (imsm_check_degraded(super, dev, failed, MAP_0) !=
7626                         IMSM_T_STATE_DEGRADED)
7627                 return NULL;
7628
7629         /*
7630          * If there are any failed disks check state of the other volume.
7631          * Block rebuild if the another one is failed until failed disks
7632          * are removed from container.
7633          */
7634         if (failed) {
7635                 dprintf("found failed disks in %.*s, check if there another"
7636                         "failed sub-array.\n",
7637                         MAX_RAID_SERIAL_LEN, dev->volume);
7638                 /* check if states of the other volumes allow for rebuild */
7639                 for (i = 0; i <  super->anchor->num_raid_devs; i++) {
7640                         if (i != inst) {
7641                                 allowed = imsm_rebuild_allowed(a->container,
7642                                                                i, failed);
7643                                 if (!allowed)
7644                                         return NULL;
7645                         }
7646                 }
7647         }
7648
7649         /* For each slot, if it is not working, find a spare */
7650         for (i = 0; i < a->info.array.raid_disks; i++) {
7651                 for (d = a->info.devs ; d ; d = d->next)
7652                         if (d->disk.raid_disk == i)
7653                                 break;
7654                 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7655                 if (d && (d->state_fd >= 0))
7656                         continue;
7657
7658                 /*
7659                  * OK, this device needs recovery.  Try to re-add the
7660                  * previous occupant of this slot, if this fails see if
7661                  * we can continue the assimilation of a spare that was
7662                  * partially assimilated, finally try to activate a new
7663                  * spare.
7664                  */
7665                 dl = imsm_readd(super, i, a);
7666                 if (!dl)
7667                         dl = imsm_add_spare(super, i, a, 0, rv);
7668                 if (!dl)
7669                         dl = imsm_add_spare(super, i, a, 1, rv);
7670                 if (!dl)
7671                         continue;
7672
7673                 /* found a usable disk with enough space */
7674                 di = xcalloc(1, sizeof(*di));
7675
7676                 /* dl->index will be -1 in the case we are activating a
7677                  * pristine spare.  imsm_process_update() will create a
7678                  * new index in this case.  Once a disk is found to be
7679                  * failed in all member arrays it is kicked from the
7680                  * metadata
7681                  */
7682                 di->disk.number = dl->index;
7683
7684                 /* (ab)use di->devs to store a pointer to the device
7685                  * we chose
7686                  */
7687                 di->devs = (struct mdinfo *) dl;
7688
7689                 di->disk.raid_disk = i;
7690                 di->disk.major = dl->major;
7691                 di->disk.minor = dl->minor;
7692                 di->disk.state = 0;
7693                 di->recovery_start = 0;
7694                 di->data_offset = pba_of_lba0(map);
7695                 di->component_size = a->info.component_size;
7696                 di->container_member = inst;
7697                 super->random = random32();
7698                 di->next = rv;
7699                 rv = di;
7700                 num_spares++;
7701                 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
7702                         i, di->data_offset);
7703         }
7704
7705         if (!rv)
7706                 /* No spares found */
7707                 return rv;
7708         /* Now 'rv' has a list of devices to return.
7709          * Create a metadata_update record to update the
7710          * disk_ord_tbl for the array
7711          */
7712         mu = xmalloc(sizeof(*mu));
7713         mu->buf = xcalloc(num_spares,
7714                           sizeof(struct imsm_update_activate_spare));
7715         mu->space = NULL;
7716         mu->space_list = NULL;
7717         mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
7718         mu->next = *updates;
7719         u = (struct imsm_update_activate_spare *) mu->buf;
7720
7721         for (di = rv ; di ; di = di->next) {
7722                 u->type = update_activate_spare;
7723                 u->dl = (struct dl *) di->devs;
7724                 di->devs = NULL;
7725                 u->slot = di->disk.raid_disk;
7726                 u->array = inst;
7727                 u->next = u + 1;
7728                 u++;
7729         }
7730         (u-1)->next = NULL;
7731         *updates = mu;
7732
7733         return rv;
7734 }
7735
7736 static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
7737 {
7738         struct imsm_dev *dev = get_imsm_dev(super, idx);
7739         struct imsm_map *map = get_imsm_map(dev, MAP_0);
7740         struct imsm_map *new_map = get_imsm_map(&u->dev, MAP_0);
7741         struct disk_info *inf = get_disk_info(u);
7742         struct imsm_disk *disk;
7743         int i;
7744         int j;
7745
7746         for (i = 0; i < map->num_members; i++) {
7747                 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, MAP_X));
7748                 for (j = 0; j < new_map->num_members; j++)
7749                         if (serialcmp(disk->serial, inf[j].serial) == 0)
7750                                 return 1;
7751         }
7752
7753         return 0;
7754 }
7755
7756
7757 static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
7758 {
7759         struct dl *dl = NULL;
7760         for (dl = super->disks; dl; dl = dl->next)
7761                 if ((dl->major == major) &&  (dl->minor == minor))
7762                         return dl;
7763         return NULL;
7764 }
7765
7766 static int remove_disk_super(struct intel_super *super, int major, int minor)
7767 {
7768         struct dl *prev = NULL;
7769         struct dl *dl;
7770
7771         prev = NULL;
7772         for (dl = super->disks; dl; dl = dl->next) {
7773                 if ((dl->major == major) && (dl->minor == minor)) {
7774                         /* remove */
7775                         if (prev)
7776                                 prev->next = dl->next;
7777                         else
7778                                 super->disks = dl->next;
7779                         dl->next = NULL;
7780                         __free_imsm_disk(dl);
7781                         dprintf("%s: removed %x:%x\n",
7782                                 __func__, major, minor);
7783                         break;
7784                 }
7785                 prev = dl;
7786         }
7787         return 0;
7788 }
7789
7790 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
7791
7792 static int add_remove_disk_update(struct intel_super *super)
7793 {
7794         int check_degraded = 0;
7795         struct dl *disk = NULL;
7796         /* add/remove some spares to/from the metadata/contrainer */
7797         while (super->disk_mgmt_list) {
7798                 struct dl *disk_cfg;
7799
7800                 disk_cfg = super->disk_mgmt_list;
7801                 super->disk_mgmt_list = disk_cfg->next;
7802                 disk_cfg->next = NULL;
7803
7804                 if (disk_cfg->action == DISK_ADD) {
7805                         disk_cfg->next = super->disks;
7806                         super->disks = disk_cfg;
7807                         check_degraded = 1;
7808                         dprintf("%s: added %x:%x\n",
7809                                 __func__, disk_cfg->major,
7810                                 disk_cfg->minor);
7811                 } else if (disk_cfg->action == DISK_REMOVE) {
7812                         dprintf("Disk remove action processed: %x.%x\n",
7813                                 disk_cfg->major, disk_cfg->minor);
7814                         disk = get_disk_super(super,
7815                                               disk_cfg->major,
7816                                               disk_cfg->minor);
7817                         if (disk) {
7818                                 /* store action status */
7819                                 disk->action = DISK_REMOVE;
7820                                 /* remove spare disks only */
7821                                 if (disk->index == -1) {
7822                                         remove_disk_super(super,
7823                                                           disk_cfg->major,
7824                                                           disk_cfg->minor);
7825                                 }
7826                         }
7827                         /* release allocate disk structure */
7828                         __free_imsm_disk(disk_cfg);
7829                 }
7830         }
7831         return check_degraded;
7832 }
7833
7834
7835 static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
7836                                                 struct intel_super *super,
7837                                                 void ***space_list)
7838 {
7839         struct intel_dev *id;
7840         void **tofree = NULL;
7841         int ret_val = 0;
7842
7843         dprintf("apply_reshape_migration_update()\n");
7844         if ((u->subdev < 0) ||
7845             (u->subdev > 1)) {
7846                 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
7847                 return ret_val;
7848         }
7849         if ((space_list == NULL) || (*space_list == NULL)) {
7850                 dprintf("imsm: Error: Memory is not allocated\n");
7851                 return ret_val;
7852         }
7853
7854         for (id = super->devlist ; id; id = id->next) {
7855                 if (id->index == (unsigned)u->subdev) {
7856                         struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
7857                         struct imsm_map *map;
7858                         struct imsm_dev *new_dev =
7859                                 (struct imsm_dev *)*space_list;
7860                         struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
7861                         int to_state;
7862                         struct dl *new_disk;
7863
7864                         if (new_dev == NULL)
7865                                 return ret_val;
7866                         *space_list = **space_list;
7867                         memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
7868                         map = get_imsm_map(new_dev, MAP_0);
7869                         if (migr_map) {
7870                                 dprintf("imsm: Error: migration in progress");
7871                                 return ret_val;
7872                         }
7873
7874                         to_state = map->map_state;
7875                         if ((u->new_level == 5) && (map->raid_level == 0)) {
7876                                 map->num_members++;
7877                                 /* this should not happen */
7878                                 if (u->new_disks[0] < 0) {
7879                                         map->failed_disk_num =
7880                                                 map->num_members - 1;
7881                                         to_state = IMSM_T_STATE_DEGRADED;
7882                                 } else
7883                                         to_state = IMSM_T_STATE_NORMAL;
7884                         }
7885                         migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
7886                         if (u->new_level > -1)
7887                                 map->raid_level = u->new_level;
7888                         migr_map = get_imsm_map(new_dev, MAP_1);
7889                         if ((u->new_level == 5) &&
7890                             (migr_map->raid_level == 0)) {
7891                                 int ord = map->num_members - 1;
7892                                 migr_map->num_members--;
7893                                 if (u->new_disks[0] < 0)
7894                                         ord |= IMSM_ORD_REBUILD;
7895                                 set_imsm_ord_tbl_ent(map,
7896                                                      map->num_members - 1,
7897                                                      ord);
7898                         }
7899                         id->dev = new_dev;
7900                         tofree = (void **)dev;
7901
7902                         /* update chunk size
7903                          */
7904                         if (u->new_chunksize > 0)
7905                                 map->blocks_per_strip =
7906                                         __cpu_to_le16(u->new_chunksize * 2);
7907
7908                         /* add disk
7909                          */
7910                         if ((u->new_level != 5) ||
7911                             (migr_map->raid_level != 0) ||
7912                             (migr_map->raid_level == map->raid_level))
7913                                 goto skip_disk_add;
7914
7915                         if (u->new_disks[0] >= 0) {
7916                                 /* use passes spare
7917                                  */
7918                                 new_disk = get_disk_super(super,
7919                                                         major(u->new_disks[0]),
7920                                                         minor(u->new_disks[0]));
7921                                 dprintf("imsm: new disk for reshape is: %i:%i "
7922                                         "(%p, index = %i)\n",
7923                                         major(u->new_disks[0]),
7924                                         minor(u->new_disks[0]),
7925                                         new_disk, new_disk->index);
7926                                 if (new_disk == NULL)
7927                                         goto error_disk_add;
7928
7929                                 new_disk->index = map->num_members - 1;
7930                                 /* slot to fill in autolayout
7931                                  */
7932                                 new_disk->raiddisk = new_disk->index;
7933                                 new_disk->disk.status |= CONFIGURED_DISK;
7934                                 new_disk->disk.status &= ~SPARE_DISK;
7935                         } else
7936                                 goto error_disk_add;
7937
7938 skip_disk_add:
7939                         *tofree = *space_list;
7940                         /* calculate new size
7941                          */
7942                         imsm_set_array_size(new_dev, -1);
7943
7944                         ret_val = 1;
7945                 }
7946         }
7947
7948         if (tofree)
7949                 *space_list = tofree;
7950         return ret_val;
7951
7952 error_disk_add:
7953         dprintf("Error: imsm: Cannot find disk.\n");
7954         return ret_val;
7955 }
7956
7957 static int apply_size_change_update(struct imsm_update_size_change *u,
7958                 struct intel_super *super)
7959 {
7960         struct intel_dev *id;
7961         int ret_val = 0;
7962
7963         dprintf("apply_size_change_update()\n");
7964         if ((u->subdev < 0) ||
7965             (u->subdev > 1)) {
7966                 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
7967                 return ret_val;
7968         }
7969
7970         for (id = super->devlist ; id; id = id->next) {
7971                 if (id->index == (unsigned)u->subdev) {
7972                         struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
7973                         struct imsm_map *map = get_imsm_map(dev, MAP_0);
7974                         int used_disks = imsm_num_data_members(dev, MAP_0);
7975                         unsigned long long blocks_per_member;
7976
7977                         /* calculate new size
7978                          */
7979                         blocks_per_member = u->new_size / used_disks;
7980                         dprintf("imsm: apply_size_change_update(size: %llu, "
7981                                 "blocks per member: %llu)\n",
7982                                 u->new_size, blocks_per_member);
7983                         set_blocks_per_member(map, blocks_per_member);
7984                         imsm_set_array_size(dev, u->new_size);
7985
7986                         ret_val = 1;
7987                         break;
7988                 }
7989         }
7990
7991         return ret_val;
7992 }
7993
7994
7995 static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
7996                                        struct intel_super *super,
7997                                        struct active_array *active_array)
7998 {
7999         struct imsm_super *mpb = super->anchor;
8000         struct imsm_dev *dev = get_imsm_dev(super, u->array);
8001         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8002         struct imsm_map *migr_map;
8003         struct active_array *a;
8004         struct imsm_disk *disk;
8005         __u8 to_state;
8006         struct dl *dl;
8007         unsigned int found;
8008         int failed;
8009         int victim;
8010         int i;
8011         int second_map_created = 0;
8012
8013         for (; u; u = u->next) {
8014                 victim = get_imsm_disk_idx(dev, u->slot, MAP_X);
8015
8016                 if (victim < 0)
8017                         return 0;
8018
8019                 for (dl = super->disks; dl; dl = dl->next)
8020                         if (dl == u->dl)
8021                                 break;
8022
8023                 if (!dl) {
8024                         fprintf(stderr, "error: imsm_activate_spare passed "
8025                                 "an unknown disk (index: %d)\n",
8026                                 u->dl->index);
8027                         return 0;
8028                 }
8029
8030                 /* count failures (excluding rebuilds and the victim)
8031                  * to determine map[0] state
8032                  */
8033                 failed = 0;
8034                 for (i = 0; i < map->num_members; i++) {
8035                         if (i == u->slot)
8036                                 continue;
8037                         disk = get_imsm_disk(super,
8038                                              get_imsm_disk_idx(dev, i, MAP_X));
8039                         if (!disk || is_failed(disk))
8040                                 failed++;
8041                 }
8042
8043                 /* adding a pristine spare, assign a new index */
8044                 if (dl->index < 0) {
8045                         dl->index = super->anchor->num_disks;
8046                         super->anchor->num_disks++;
8047                 }
8048                 disk = &dl->disk;
8049                 disk->status |= CONFIGURED_DISK;
8050                 disk->status &= ~SPARE_DISK;
8051
8052                 /* mark rebuild */
8053                 to_state = imsm_check_degraded(super, dev, failed, MAP_0);
8054                 if (!second_map_created) {
8055                         second_map_created = 1;
8056                         map->map_state = IMSM_T_STATE_DEGRADED;
8057                         migrate(dev, super, to_state, MIGR_REBUILD);
8058                 } else
8059                         map->map_state = to_state;
8060                 migr_map = get_imsm_map(dev, MAP_1);
8061                 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
8062                 set_imsm_ord_tbl_ent(migr_map, u->slot,
8063                                      dl->index | IMSM_ORD_REBUILD);
8064
8065                 /* update the family_num to mark a new container
8066                  * generation, being careful to record the existing
8067                  * family_num in orig_family_num to clean up after
8068                  * earlier mdadm versions that neglected to set it.
8069                  */
8070                 if (mpb->orig_family_num == 0)
8071                         mpb->orig_family_num = mpb->family_num;
8072                 mpb->family_num += super->random;
8073
8074                 /* count arrays using the victim in the metadata */
8075                 found = 0;
8076                 for (a = active_array; a ; a = a->next) {
8077                         dev = get_imsm_dev(super, a->info.container_member);
8078                         map = get_imsm_map(dev, MAP_0);
8079
8080                         if (get_imsm_disk_slot(map, victim) >= 0)
8081                                 found++;
8082                 }
8083
8084                 /* delete the victim if it is no longer being
8085                  * utilized anywhere
8086                  */
8087                 if (!found) {
8088                         struct dl **dlp;
8089
8090                         /* We know that 'manager' isn't touching anything,
8091                          * so it is safe to delete
8092                          */
8093                         for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
8094                                 if ((*dlp)->index == victim)
8095                                         break;
8096
8097                         /* victim may be on the missing list */
8098                         if (!*dlp)
8099                                 for (dlp = &super->missing; *dlp;
8100                                      dlp = &(*dlp)->next)
8101                                         if ((*dlp)->index == victim)
8102                                                 break;
8103                         imsm_delete(super, dlp, victim);
8104                 }
8105         }
8106
8107         return 1;
8108 }
8109
8110 static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
8111                                                 struct intel_super *super,
8112                                                 void ***space_list)
8113 {
8114         struct dl *new_disk;
8115         struct intel_dev *id;
8116         int i;
8117         int delta_disks = u->new_raid_disks - u->old_raid_disks;
8118         int disk_count = u->old_raid_disks;
8119         void **tofree = NULL;
8120         int devices_to_reshape = 1;
8121         struct imsm_super *mpb = super->anchor;
8122         int ret_val = 0;
8123         unsigned int dev_id;
8124
8125         dprintf("imsm: apply_reshape_container_disks_update()\n");
8126
8127         /* enable spares to use in array */
8128         for (i = 0; i < delta_disks; i++) {
8129                 new_disk = get_disk_super(super,
8130                                           major(u->new_disks[i]),
8131                                           minor(u->new_disks[i]));
8132                 dprintf("imsm: new disk for reshape is: %i:%i "
8133                         "(%p, index = %i)\n",
8134                         major(u->new_disks[i]), minor(u->new_disks[i]),
8135                         new_disk, new_disk->index);
8136                 if ((new_disk == NULL) ||
8137                     ((new_disk->index >= 0) &&
8138                      (new_disk->index < u->old_raid_disks)))
8139                         goto update_reshape_exit;
8140                 new_disk->index = disk_count++;
8141                 /* slot to fill in autolayout
8142                  */
8143                 new_disk->raiddisk = new_disk->index;
8144                 new_disk->disk.status |=
8145                         CONFIGURED_DISK;
8146                 new_disk->disk.status &= ~SPARE_DISK;
8147         }
8148
8149         dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
8150                 mpb->num_raid_devs);
8151         /* manage changes in volume
8152          */
8153         for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
8154                 void **sp = *space_list;
8155                 struct imsm_dev *newdev;
8156                 struct imsm_map *newmap, *oldmap;
8157
8158                 for (id = super->devlist ; id; id = id->next) {
8159                         if (id->index == dev_id)
8160                                 break;
8161                 }
8162                 if (id == NULL)
8163                         break;
8164                 if (!sp)
8165                         continue;
8166                 *space_list = *sp;
8167                 newdev = (void*)sp;
8168                 /* Copy the dev, but not (all of) the map */
8169                 memcpy(newdev, id->dev, sizeof(*newdev));
8170                 oldmap = get_imsm_map(id->dev, MAP_0);
8171                 newmap = get_imsm_map(newdev, MAP_0);
8172                 /* Copy the current map */
8173                 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
8174                 /* update one device only
8175                  */
8176                 if (devices_to_reshape) {
8177                         dprintf("imsm: modifying subdev: %i\n",
8178                                 id->index);
8179                         devices_to_reshape--;
8180                         newdev->vol.migr_state = 1;
8181                         newdev->vol.curr_migr_unit = 0;
8182                         set_migr_type(newdev, MIGR_GEN_MIGR);
8183                         newmap->num_members = u->new_raid_disks;
8184                         for (i = 0; i < delta_disks; i++) {
8185                                 set_imsm_ord_tbl_ent(newmap,
8186                                                      u->old_raid_disks + i,
8187                                                      u->old_raid_disks + i);
8188                         }
8189                         /* New map is correct, now need to save old map
8190                          */
8191                         newmap = get_imsm_map(newdev, MAP_1);
8192                         memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
8193
8194                         imsm_set_array_size(newdev, -1);
8195                 }
8196
8197                 sp = (void **)id->dev;
8198                 id->dev = newdev;
8199                 *sp = tofree;
8200                 tofree = sp;
8201
8202                 /* Clear migration record */
8203                 memset(super->migr_rec, 0, sizeof(struct migr_record));
8204         }
8205         if (tofree)
8206                 *space_list = tofree;
8207         ret_val = 1;
8208
8209 update_reshape_exit:
8210
8211         return ret_val;
8212 }
8213
8214 static int apply_takeover_update(struct imsm_update_takeover *u,
8215                                  struct intel_super *super,
8216                                  void ***space_list)
8217 {
8218         struct imsm_dev *dev = NULL;
8219         struct intel_dev *dv;
8220         struct imsm_dev *dev_new;
8221         struct imsm_map *map;
8222         struct dl *dm, *du;
8223         int i;
8224
8225         for (dv = super->devlist; dv; dv = dv->next)
8226                 if (dv->index == (unsigned int)u->subarray) {
8227                         dev = dv->dev;
8228                         break;
8229                 }
8230
8231         if (dev == NULL)
8232                 return 0;
8233
8234         map = get_imsm_map(dev, MAP_0);
8235
8236         if (u->direction == R10_TO_R0) {
8237                 /* Number of failed disks must be half of initial disk number */
8238                 if (imsm_count_failed(super, dev, MAP_0) !=
8239                                 (map->num_members / 2))
8240                         return 0;
8241
8242                 /* iterate through devices to mark removed disks as spare */
8243                 for (dm = super->disks; dm; dm = dm->next) {
8244                         if (dm->disk.status & FAILED_DISK) {
8245                                 int idx = dm->index;
8246                                 /* update indexes on the disk list */
8247 /* FIXME this loop-with-the-loop looks wrong,  I'm not convinced
8248    the index values will end up being correct.... NB */
8249                                 for (du = super->disks; du; du = du->next)
8250                                         if (du->index > idx)
8251                                                 du->index--;
8252                                 /* mark as spare disk */
8253                                 mark_spare(dm);
8254                         }
8255                 }
8256                 /* update map */
8257                 map->num_members = map->num_members / 2;
8258                 map->map_state = IMSM_T_STATE_NORMAL;
8259                 map->num_domains = 1;
8260                 map->raid_level = 0;
8261                 map->failed_disk_num = -1;
8262         }
8263
8264         if (u->direction == R0_TO_R10) {
8265                 void **space;
8266                 /* update slots in current disk list */
8267                 for (dm = super->disks; dm; dm = dm->next) {
8268                         if (dm->index >= 0)
8269                                 dm->index *= 2;
8270                 }
8271                 /* create new *missing* disks */
8272                 for (i = 0; i < map->num_members; i++) {
8273                         space = *space_list;
8274                         if (!space)
8275                                 continue;
8276                         *space_list = *space;
8277                         du = (void *)space;
8278                         memcpy(du, super->disks, sizeof(*du));
8279                         du->fd = -1;
8280                         du->minor = 0;
8281                         du->major = 0;
8282                         du->index = (i * 2) + 1;
8283                         sprintf((char *)du->disk.serial,
8284                                 " MISSING_%d", du->index);
8285                         sprintf((char *)du->serial,
8286                                 "MISSING_%d", du->index);
8287                         du->next = super->missing;
8288                         super->missing = du;
8289                 }
8290                 /* create new dev and map */
8291                 space = *space_list;
8292                 if (!space)
8293                         return 0;
8294                 *space_list = *space;
8295                 dev_new = (void *)space;
8296                 memcpy(dev_new, dev, sizeof(*dev));
8297                 /* update new map */
8298                 map = get_imsm_map(dev_new, MAP_0);
8299                 map->num_members = map->num_members * 2;
8300                 map->map_state = IMSM_T_STATE_DEGRADED;
8301                 map->num_domains = 2;
8302                 map->raid_level = 1;
8303                 /* replace dev<->dev_new */
8304                 dv->dev = dev_new;
8305         }
8306         /* update disk order table */
8307         for (du = super->disks; du; du = du->next)
8308                 if (du->index >= 0)
8309                         set_imsm_ord_tbl_ent(map, du->index, du->index);
8310         for (du = super->missing; du; du = du->next)
8311                 if (du->index >= 0) {
8312                         set_imsm_ord_tbl_ent(map, du->index, du->index);
8313                         mark_missing(dv->dev, &du->disk, du->index);
8314                 }
8315
8316         return 1;
8317 }
8318
8319 static void imsm_process_update(struct supertype *st,
8320                                 struct metadata_update *update)
8321 {
8322         /**
8323          * crack open the metadata_update envelope to find the update record
8324          * update can be one of:
8325          *    update_reshape_container_disks - all the arrays in the container
8326          *      are being reshaped to have more devices.  We need to mark
8327          *      the arrays for general migration and convert selected spares
8328          *      into active devices.
8329          *    update_activate_spare - a spare device has replaced a failed
8330          *      device in an array, update the disk_ord_tbl.  If this disk is
8331          *      present in all member arrays then also clear the SPARE_DISK
8332          *      flag
8333          *    update_create_array
8334          *    update_kill_array
8335          *    update_rename_array
8336          *    update_add_remove_disk
8337          */
8338         struct intel_super *super = st->sb;
8339         struct imsm_super *mpb;
8340         enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
8341
8342         /* update requires a larger buf but the allocation failed */
8343         if (super->next_len && !super->next_buf) {
8344                 super->next_len = 0;
8345                 return;
8346         }
8347
8348         if (super->next_buf) {
8349                 memcpy(super->next_buf, super->buf, super->len);
8350                 free(super->buf);
8351                 super->len = super->next_len;
8352                 super->buf = super->next_buf;
8353
8354                 super->next_len = 0;
8355                 super->next_buf = NULL;
8356         }
8357
8358         mpb = super->anchor;
8359
8360         switch (type) {
8361         case update_general_migration_checkpoint: {
8362                 struct intel_dev *id;
8363                 struct imsm_update_general_migration_checkpoint *u =
8364                                                         (void *)update->buf;
8365
8366                 dprintf("imsm: process_update() "
8367                         "for update_general_migration_checkpoint called\n");
8368
8369                 /* find device under general migration */
8370                 for (id = super->devlist ; id; id = id->next) {
8371                         if (is_gen_migration(id->dev)) {
8372                                 id->dev->vol.curr_migr_unit =
8373                                         __cpu_to_le32(u->curr_migr_unit);
8374                                 super->updates_pending++;
8375                         }
8376                 }
8377                 break;
8378         }
8379         case update_takeover: {
8380                 struct imsm_update_takeover *u = (void *)update->buf;
8381                 if (apply_takeover_update(u, super, &update->space_list)) {
8382                         imsm_update_version_info(super);
8383                         super->updates_pending++;
8384                 }
8385                 break;
8386         }
8387
8388         case update_reshape_container_disks: {
8389                 struct imsm_update_reshape *u = (void *)update->buf;
8390                 if (apply_reshape_container_disks_update(
8391                             u, super, &update->space_list))
8392                         super->updates_pending++;
8393                 break;
8394         }
8395         case update_reshape_migration: {
8396                 struct imsm_update_reshape_migration *u = (void *)update->buf;
8397                 if (apply_reshape_migration_update(
8398                             u, super, &update->space_list))
8399                         super->updates_pending++;
8400                 break;
8401         }
8402         case update_size_change: {
8403                 struct imsm_update_size_change *u = (void *)update->buf;
8404                 if (apply_size_change_update(u, super))
8405                         super->updates_pending++;
8406                 break;
8407         }
8408         case update_activate_spare: {
8409                 struct imsm_update_activate_spare *u = (void *) update->buf;
8410                 if (apply_update_activate_spare(u, super, st->arrays))
8411                         super->updates_pending++;
8412                 break;
8413         }
8414         case update_create_array: {
8415                 /* someone wants to create a new array, we need to be aware of
8416                  * a few races/collisions:
8417                  * 1/ 'Create' called by two separate instances of mdadm
8418                  * 2/ 'Create' versus 'activate_spare': mdadm has chosen
8419                  *     devices that have since been assimilated via
8420                  *     activate_spare.
8421                  * In the event this update can not be carried out mdadm will
8422                  * (FIX ME) notice that its update did not take hold.
8423                  */
8424                 struct imsm_update_create_array *u = (void *) update->buf;
8425                 struct intel_dev *dv;
8426                 struct imsm_dev *dev;
8427                 struct imsm_map *map, *new_map;
8428                 unsigned long long start, end;
8429                 unsigned long long new_start, new_end;
8430                 int i;
8431                 struct disk_info *inf;
8432                 struct dl *dl;
8433
8434                 /* handle racing creates: first come first serve */
8435                 if (u->dev_idx < mpb->num_raid_devs) {
8436                         dprintf("%s: subarray %d already defined\n",
8437                                 __func__, u->dev_idx);
8438                         goto create_error;
8439                 }
8440
8441                 /* check update is next in sequence */
8442                 if (u->dev_idx != mpb->num_raid_devs) {
8443                         dprintf("%s: can not create array %d expected index %d\n",
8444                                 __func__, u->dev_idx, mpb->num_raid_devs);
8445                         goto create_error;
8446                 }
8447
8448                 new_map = get_imsm_map(&u->dev, MAP_0);
8449                 new_start = pba_of_lba0(new_map);
8450                 new_end = new_start + blocks_per_member(new_map);
8451                 inf = get_disk_info(u);
8452
8453                 /* handle activate_spare versus create race:
8454                  * check to make sure that overlapping arrays do not include
8455                  * overalpping disks
8456                  */
8457                 for (i = 0; i < mpb->num_raid_devs; i++) {
8458                         dev = get_imsm_dev(super, i);
8459                         map = get_imsm_map(dev, MAP_0);
8460                         start = pba_of_lba0(map);
8461                         end = start + blocks_per_member(map);
8462                         if ((new_start >= start && new_start <= end) ||
8463                             (start >= new_start && start <= new_end))
8464                                 /* overlap */;
8465                         else
8466                                 continue;
8467
8468                         if (disks_overlap(super, i, u)) {
8469                                 dprintf("%s: arrays overlap\n", __func__);
8470                                 goto create_error;
8471                         }
8472                 }
8473
8474                 /* check that prepare update was successful */
8475                 if (!update->space) {
8476                         dprintf("%s: prepare update failed\n", __func__);
8477                         goto create_error;
8478                 }
8479
8480                 /* check that all disks are still active before committing
8481                  * changes.  FIXME: could we instead handle this by creating a
8482                  * degraded array?  That's probably not what the user expects,
8483                  * so better to drop this update on the floor.
8484                  */
8485                 for (i = 0; i < new_map->num_members; i++) {
8486                         dl = serial_to_dl(inf[i].serial, super);
8487                         if (!dl) {
8488                                 dprintf("%s: disk disappeared\n", __func__);
8489                                 goto create_error;
8490                         }
8491                 }
8492
8493                 super->updates_pending++;
8494
8495                 /* convert spares to members and fixup ord_tbl */
8496                 for (i = 0; i < new_map->num_members; i++) {
8497                         dl = serial_to_dl(inf[i].serial, super);
8498                         if (dl->index == -1) {
8499                                 dl->index = mpb->num_disks;
8500                                 mpb->num_disks++;
8501                                 dl->disk.status |= CONFIGURED_DISK;
8502                                 dl->disk.status &= ~SPARE_DISK;
8503                         }
8504                         set_imsm_ord_tbl_ent(new_map, i, dl->index);
8505                 }
8506
8507                 dv = update->space;
8508                 dev = dv->dev;
8509                 update->space = NULL;
8510                 imsm_copy_dev(dev, &u->dev);
8511                 dv->index = u->dev_idx;
8512                 dv->next = super->devlist;
8513                 super->devlist = dv;
8514                 mpb->num_raid_devs++;
8515
8516                 imsm_update_version_info(super);
8517                 break;
8518  create_error:
8519                 /* mdmon knows how to release update->space, but not
8520                  * ((struct intel_dev *) update->space)->dev
8521                  */
8522                 if (update->space) {
8523                         dv = update->space;
8524                         free(dv->dev);
8525                 }
8526                 break;
8527         }
8528         case update_kill_array: {
8529                 struct imsm_update_kill_array *u = (void *) update->buf;
8530                 int victim = u->dev_idx;
8531                 struct active_array *a;
8532                 struct intel_dev **dp;
8533                 struct imsm_dev *dev;
8534
8535                 /* sanity check that we are not affecting the uuid of
8536                  * active arrays, or deleting an active array
8537                  *
8538                  * FIXME when immutable ids are available, but note that
8539                  * we'll also need to fixup the invalidated/active
8540                  * subarray indexes in mdstat
8541                  */
8542                 for (a = st->arrays; a; a = a->next)
8543                         if (a->info.container_member >= victim)
8544                                 break;
8545                 /* by definition if mdmon is running at least one array
8546                  * is active in the container, so checking
8547                  * mpb->num_raid_devs is just extra paranoia
8548                  */
8549                 dev = get_imsm_dev(super, victim);
8550                 if (a || !dev || mpb->num_raid_devs == 1) {
8551                         dprintf("failed to delete subarray-%d\n", victim);
8552                         break;
8553                 }
8554
8555                 for (dp = &super->devlist; *dp;)
8556                         if ((*dp)->index == (unsigned)super->current_vol) {
8557                                 *dp = (*dp)->next;
8558                         } else {
8559                                 if ((*dp)->index > (unsigned)victim)
8560                                         (*dp)->index--;
8561                                 dp = &(*dp)->next;
8562                         }
8563                 mpb->num_raid_devs--;
8564                 super->updates_pending++;
8565                 break;
8566         }
8567         case update_rename_array: {
8568                 struct imsm_update_rename_array *u = (void *) update->buf;
8569                 char name[MAX_RAID_SERIAL_LEN+1];
8570                 int target = u->dev_idx;
8571                 struct active_array *a;
8572                 struct imsm_dev *dev;
8573
8574                 /* sanity check that we are not affecting the uuid of
8575                  * an active array
8576                  */
8577                 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
8578                 name[MAX_RAID_SERIAL_LEN] = '\0';
8579                 for (a = st->arrays; a; a = a->next)
8580                         if (a->info.container_member == target)
8581                                 break;
8582                 dev = get_imsm_dev(super, u->dev_idx);
8583                 if (a || !dev || !check_name(super, name, 1)) {
8584                         dprintf("failed to rename subarray-%d\n", target);
8585                         break;
8586                 }
8587
8588                 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
8589                 super->updates_pending++;
8590                 break;
8591         }
8592         case update_add_remove_disk: {
8593                 /* we may be able to repair some arrays if disks are
8594                  * being added, check teh status of add_remove_disk
8595                  * if discs has been added.
8596                  */
8597                 if (add_remove_disk_update(super)) {
8598                         struct active_array *a;
8599
8600                         super->updates_pending++;
8601                         for (a = st->arrays; a; a = a->next)
8602                                 a->check_degraded = 1;
8603                 }
8604                 break;
8605         }
8606         default:
8607                 fprintf(stderr, "error: unsuported process update type:"
8608                         "(type: %d)\n", type);
8609         }
8610 }
8611
8612 static struct mdinfo *get_spares_for_grow(struct supertype *st);
8613
8614 static void imsm_prepare_update(struct supertype *st,
8615                                 struct metadata_update *update)
8616 {
8617         /**
8618          * Allocate space to hold new disk entries, raid-device entries or a new
8619          * mpb if necessary.  The manager synchronously waits for updates to
8620          * complete in the monitor, so new mpb buffers allocated here can be
8621          * integrated by the monitor thread without worrying about live pointers
8622          * in the manager thread.
8623          */
8624         enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
8625         struct intel_super *super = st->sb;
8626         struct imsm_super *mpb = super->anchor;
8627         size_t buf_len;
8628         size_t len = 0;
8629
8630         switch (type) {
8631         case update_general_migration_checkpoint:
8632                 dprintf("imsm: prepare_update() "
8633                         "for update_general_migration_checkpoint called\n");
8634                 break;
8635         case update_takeover: {
8636                 struct imsm_update_takeover *u = (void *)update->buf;
8637                 if (u->direction == R0_TO_R10) {
8638                         void **tail = (void **)&update->space_list;
8639                         struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
8640                         struct imsm_map *map = get_imsm_map(dev, MAP_0);
8641                         int num_members = map->num_members;
8642                         void *space;
8643                         int size, i;
8644                         /* allocate memory for added disks */
8645                         for (i = 0; i < num_members; i++) {
8646                                 size = sizeof(struct dl);
8647                                 space = xmalloc(size);
8648                                 *tail = space;
8649                                 tail = space;
8650                                 *tail = NULL;
8651                         }
8652                         /* allocate memory for new device */
8653                         size = sizeof_imsm_dev(super->devlist->dev, 0) +
8654                                 (num_members * sizeof(__u32));
8655                         space = xmalloc(size);
8656                         *tail = space;
8657                         tail = space;
8658                         *tail = NULL;
8659                         len = disks_to_mpb_size(num_members * 2);
8660                 }
8661
8662                 break;
8663         }
8664         case update_reshape_container_disks: {
8665                 /* Every raid device in the container is about to
8666                  * gain some more devices, and we will enter a
8667                  * reconfiguration.
8668                  * So each 'imsm_map' will be bigger, and the imsm_vol
8669                  * will now hold 2 of them.
8670                  * Thus we need new 'struct imsm_dev' allocations sized
8671                  * as sizeof_imsm_dev but with more devices in both maps.
8672                  */
8673                 struct imsm_update_reshape *u = (void *)update->buf;
8674                 struct intel_dev *dl;
8675                 void **space_tail = (void**)&update->space_list;
8676
8677                 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
8678
8679                 for (dl = super->devlist; dl; dl = dl->next) {
8680                         int size = sizeof_imsm_dev(dl->dev, 1);
8681                         void *s;
8682                         if (u->new_raid_disks > u->old_raid_disks)
8683                                 size += sizeof(__u32)*2*
8684                                         (u->new_raid_disks - u->old_raid_disks);
8685                         s = xmalloc(size);
8686                         *space_tail = s;
8687                         space_tail = s;
8688                         *space_tail = NULL;
8689                 }
8690
8691                 len = disks_to_mpb_size(u->new_raid_disks);
8692                 dprintf("New anchor length is %llu\n", (unsigned long long)len);
8693                 break;
8694         }
8695         case update_reshape_migration: {
8696                 /* for migration level 0->5 we need to add disks
8697                  * so the same as for container operation we will copy
8698                  * device to the bigger location.
8699                  * in memory prepared device and new disk area are prepared
8700                  * for usage in process update
8701                  */
8702                 struct imsm_update_reshape_migration *u = (void *)update->buf;
8703                 struct intel_dev *id;
8704                 void **space_tail = (void **)&update->space_list;
8705                 int size;
8706                 void *s;
8707                 int current_level = -1;
8708
8709                 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
8710
8711                 /* add space for bigger array in update
8712                  */
8713                 for (id = super->devlist; id; id = id->next) {
8714                         if (id->index == (unsigned)u->subdev) {
8715                                 size = sizeof_imsm_dev(id->dev, 1);
8716                                 if (u->new_raid_disks > u->old_raid_disks)
8717                                         size += sizeof(__u32)*2*
8718                                         (u->new_raid_disks - u->old_raid_disks);
8719                                 s = xmalloc(size);
8720                                 *space_tail = s;
8721                                 space_tail = s;
8722                                 *space_tail = NULL;
8723                                 break;
8724                         }
8725                 }
8726                 if (update->space_list == NULL)
8727                         break;
8728
8729                 /* add space for disk in update
8730                  */
8731                 size = sizeof(struct dl);
8732                 s = xmalloc(size);
8733                 *space_tail = s;
8734                 space_tail = s;
8735                 *space_tail = NULL;
8736
8737                 /* add spare device to update
8738                  */
8739                 for (id = super->devlist ; id; id = id->next)
8740                         if (id->index == (unsigned)u->subdev) {
8741                                 struct imsm_dev *dev;
8742                                 struct imsm_map *map;
8743
8744                                 dev = get_imsm_dev(super, u->subdev);
8745                                 map = get_imsm_map(dev, MAP_0);
8746                                 current_level = map->raid_level;
8747                                 break;
8748                         }
8749                 if ((u->new_level == 5) && (u->new_level != current_level)) {
8750                         struct mdinfo *spares;
8751
8752                         spares = get_spares_for_grow(st);
8753                         if (spares) {
8754                                 struct dl *dl;
8755                                 struct mdinfo *dev;
8756
8757                                 dev = spares->devs;
8758                                 if (dev) {
8759                                         u->new_disks[0] =
8760                                                 makedev(dev->disk.major,
8761                                                         dev->disk.minor);
8762                                         dl = get_disk_super(super,
8763                                                             dev->disk.major,
8764                                                             dev->disk.minor);
8765                                         dl->index = u->old_raid_disks;
8766                                         dev = dev->next;
8767                                 }
8768                                 sysfs_free(spares);
8769                         }
8770                 }
8771                 len = disks_to_mpb_size(u->new_raid_disks);
8772                 dprintf("New anchor length is %llu\n", (unsigned long long)len);
8773                 break;
8774         }
8775         case update_size_change: {
8776                 break;
8777         }
8778         case update_create_array: {
8779                 struct imsm_update_create_array *u = (void *) update->buf;
8780                 struct intel_dev *dv;
8781                 struct imsm_dev *dev = &u->dev;
8782                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
8783                 struct dl *dl;
8784                 struct disk_info *inf;
8785                 int i;
8786                 int activate = 0;
8787
8788                 inf = get_disk_info(u);
8789                 len = sizeof_imsm_dev(dev, 1);
8790                 /* allocate a new super->devlist entry */
8791                 dv = xmalloc(sizeof(*dv));
8792                 dv->dev = xmalloc(len);
8793                 update->space = dv;
8794
8795                 /* count how many spares will be converted to members */
8796                 for (i = 0; i < map->num_members; i++) {
8797                         dl = serial_to_dl(inf[i].serial, super);
8798                         if (!dl) {
8799                                 /* hmm maybe it failed?, nothing we can do about
8800                                  * it here
8801                                  */
8802                                 continue;
8803                         }
8804                         if (count_memberships(dl, super) == 0)
8805                                 activate++;
8806                 }
8807                 len += activate * sizeof(struct imsm_disk);
8808                 break;
8809         default:
8810                 break;
8811         }
8812         }
8813
8814         /* check if we need a larger metadata buffer */
8815         if (super->next_buf)
8816                 buf_len = super->next_len;
8817         else
8818                 buf_len = super->len;
8819
8820         if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
8821                 /* ok we need a larger buf than what is currently allocated
8822                  * if this allocation fails process_update will notice that
8823                  * ->next_len is set and ->next_buf is NULL
8824                  */
8825                 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
8826                 if (super->next_buf)
8827                         free(super->next_buf);
8828
8829                 super->next_len = buf_len;
8830                 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
8831                         memset(super->next_buf, 0, buf_len);
8832                 else
8833                         super->next_buf = NULL;
8834         }
8835 }
8836
8837 /* must be called while manager is quiesced */
8838 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
8839 {
8840         struct imsm_super *mpb = super->anchor;
8841         struct dl *iter;
8842         struct imsm_dev *dev;
8843         struct imsm_map *map;
8844         int i, j, num_members;
8845         __u32 ord;
8846
8847         dprintf("%s: deleting device[%d] from imsm_super\n",
8848                 __func__, index);
8849
8850         /* shift all indexes down one */
8851         for (iter = super->disks; iter; iter = iter->next)
8852                 if (iter->index > (int)index)
8853                         iter->index--;
8854         for (iter = super->missing; iter; iter = iter->next)
8855                 if (iter->index > (int)index)
8856                         iter->index--;
8857
8858         for (i = 0; i < mpb->num_raid_devs; i++) {
8859                 dev = get_imsm_dev(super, i);
8860                 map = get_imsm_map(dev, MAP_0);
8861                 num_members = map->num_members;
8862                 for (j = 0; j < num_members; j++) {
8863                         /* update ord entries being careful not to propagate
8864                          * ord-flags to the first map
8865                          */
8866                         ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
8867
8868                         if (ord_to_idx(ord) <= index)
8869                                 continue;
8870
8871                         map = get_imsm_map(dev, MAP_0);
8872                         set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
8873                         map = get_imsm_map(dev, MAP_1);
8874                         if (map)
8875                                 set_imsm_ord_tbl_ent(map, j, ord - 1);
8876                 }
8877         }
8878
8879         mpb->num_disks--;
8880         super->updates_pending++;
8881         if (*dlp) {
8882                 struct dl *dl = *dlp;
8883
8884                 *dlp = (*dlp)->next;
8885                 __free_imsm_disk(dl);
8886         }
8887 }
8888 #endif /* MDASSEMBLE */
8889
8890 static void close_targets(int *targets, int new_disks)
8891 {
8892         int i;
8893
8894         if (!targets)
8895                 return;
8896
8897         for (i = 0; i < new_disks; i++) {
8898                 if (targets[i] >= 0) {
8899                         close(targets[i]);
8900                         targets[i] = -1;
8901                 }
8902         }
8903 }
8904
8905 static int imsm_get_allowed_degradation(int level, int raid_disks,
8906                                         struct intel_super *super,
8907                                         struct imsm_dev *dev)
8908 {
8909         switch (level) {
8910         case 1:
8911         case 10:{
8912                 int ret_val = 0;
8913                 struct imsm_map *map;
8914                 int i;
8915
8916                 ret_val = raid_disks/2;
8917                 /* check map if all disks pairs not failed
8918                  * in both maps
8919                  */
8920                 map = get_imsm_map(dev, MAP_0);
8921                 for (i = 0; i < ret_val; i++) {
8922                         int degradation = 0;
8923                         if (get_imsm_disk(super, i) == NULL)
8924                                 degradation++;
8925                         if (get_imsm_disk(super, i + 1) == NULL)
8926                                 degradation++;
8927                         if (degradation == 2)
8928                                 return 0;
8929                 }
8930                 map = get_imsm_map(dev, MAP_1);
8931                 /* if there is no second map
8932                  * result can be returned
8933                  */
8934                 if (map == NULL)
8935                         return ret_val;
8936                 /* check degradation in second map
8937                  */
8938                 for (i = 0; i < ret_val; i++) {
8939                         int degradation = 0;
8940                 if (get_imsm_disk(super, i) == NULL)
8941                                 degradation++;
8942                         if (get_imsm_disk(super, i + 1) == NULL)
8943                                 degradation++;
8944                         if (degradation == 2)
8945                                 return 0;
8946                 }
8947                 return ret_val;
8948         }
8949         case 5:
8950                 return 1;
8951         case 6:
8952                 return 2;
8953         default:
8954                 return 0;
8955         }
8956 }
8957
8958
8959 /*******************************************************************************
8960  * Function:    open_backup_targets
8961  * Description: Function opens file descriptors for all devices given in
8962  *              info->devs
8963  * Parameters:
8964  *      info            : general array info
8965  *      raid_disks      : number of disks
8966  *      raid_fds        : table of device's file descriptors
8967  *      super           : intel super for raid10 degradation check
8968  *      dev             : intel device for raid10 degradation check
8969  * Returns:
8970  *       0 : success
8971  *      -1 : fail
8972  ******************************************************************************/
8973 int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
8974                         struct intel_super *super, struct imsm_dev *dev)
8975 {
8976         struct mdinfo *sd;
8977         int i;
8978         int opened = 0;
8979
8980         for (i = 0; i < raid_disks; i++)
8981                 raid_fds[i] = -1;
8982
8983         for (sd = info->devs ; sd ; sd = sd->next) {
8984                 char *dn;
8985
8986                 if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
8987                         dprintf("disk is faulty!!\n");
8988                         continue;
8989                 }
8990
8991                 if ((sd->disk.raid_disk >= raid_disks) ||
8992                     (sd->disk.raid_disk < 0))
8993                         continue;
8994
8995                 dn = map_dev(sd->disk.major,
8996                              sd->disk.minor, 1);
8997                 raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
8998                 if (raid_fds[sd->disk.raid_disk] < 0) {
8999                         fprintf(stderr, "cannot open component\n");
9000                         continue;
9001                 }
9002                 opened++;
9003         }
9004         /* check if maximum array degradation level is not exceeded
9005         */
9006         if ((raid_disks - opened) >
9007                         imsm_get_allowed_degradation(info->new_level,
9008                                                      raid_disks,
9009                                                      super, dev)) {
9010                 fprintf(stderr, "Not enough disks can be opened.\n");
9011                 close_targets(raid_fds, raid_disks);
9012                 return -2;
9013         }
9014         return 0;
9015 }
9016
9017 #ifndef MDASSEMBLE
9018 /*******************************************************************************
9019  * Function:    init_migr_record_imsm
9020  * Description: Function inits imsm migration record
9021  * Parameters:
9022  *      super   : imsm internal array info
9023  *      dev     : device under migration
9024  *      info    : general array info to find the smallest device
9025  * Returns:
9026  *      none
9027  ******************************************************************************/
9028 void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
9029                            struct mdinfo *info)
9030 {
9031         struct intel_super *super = st->sb;
9032         struct migr_record *migr_rec = super->migr_rec;
9033         int new_data_disks;
9034         unsigned long long dsize, dev_sectors;
9035         long long unsigned min_dev_sectors = -1LLU;
9036         struct mdinfo *sd;
9037         char nm[30];
9038         int fd;
9039         struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
9040         struct imsm_map *map_src = get_imsm_map(dev, MAP_1);
9041         unsigned long long num_migr_units;
9042         unsigned long long array_blocks;
9043
9044         memset(migr_rec, 0, sizeof(struct migr_record));
9045         migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
9046
9047         /* only ascending reshape supported now */
9048         migr_rec->ascending_migr = __cpu_to_le32(1);
9049
9050         migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
9051                 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
9052         migr_rec->dest_depth_per_unit *=
9053                 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
9054         new_data_disks = imsm_num_data_members(dev, MAP_0);
9055         migr_rec->blocks_per_unit =
9056                 __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
9057         migr_rec->dest_depth_per_unit =
9058                 __cpu_to_le32(migr_rec->dest_depth_per_unit);
9059         array_blocks = info->component_size * new_data_disks;
9060         num_migr_units =
9061                 array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
9062
9063         if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
9064                 num_migr_units++;
9065         migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
9066
9067         migr_rec->post_migr_vol_cap =  dev->size_low;
9068         migr_rec->post_migr_vol_cap_hi = dev->size_high;
9069
9070
9071         /* Find the smallest dev */
9072         for (sd = info->devs ; sd ; sd = sd->next) {
9073                 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
9074                 fd = dev_open(nm, O_RDONLY);
9075                 if (fd < 0)
9076                         continue;
9077                 get_dev_size(fd, NULL, &dsize);
9078                 dev_sectors = dsize / 512;
9079                 if (dev_sectors < min_dev_sectors)
9080                         min_dev_sectors = dev_sectors;
9081                 close(fd);
9082         }
9083         migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
9084                                         RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
9085
9086         write_imsm_migr_rec(st);
9087
9088         return;
9089 }
9090
9091 /*******************************************************************************
9092  * Function:    save_backup_imsm
9093  * Description: Function saves critical data stripes to Migration Copy Area
9094  *              and updates the current migration unit status.
9095  *              Use restore_stripes() to form a destination stripe,
9096  *              and to write it to the Copy Area.
9097  * Parameters:
9098  *      st              : supertype information
9099  *      dev             : imsm device that backup is saved for
9100  *      info            : general array info
9101  *      buf             : input buffer
9102  *      length          : length of data to backup (blocks_per_unit)
9103  * Returns:
9104  *       0 : success
9105  *,     -1 : fail
9106  ******************************************************************************/
9107 int save_backup_imsm(struct supertype *st,
9108                      struct imsm_dev *dev,
9109                      struct mdinfo *info,
9110                      void *buf,
9111                      int length)
9112 {
9113         int rv = -1;
9114         struct intel_super *super = st->sb;
9115         unsigned long long *target_offsets = NULL;
9116         int *targets = NULL;
9117         int i;
9118         struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
9119         int new_disks = map_dest->num_members;
9120         int dest_layout = 0;
9121         int dest_chunk;
9122         unsigned long long start;
9123         int data_disks = imsm_num_data_members(dev, MAP_0);
9124
9125         targets = xmalloc(new_disks * sizeof(int));
9126
9127         for (i = 0; i < new_disks; i++)
9128                 targets[i] = -1;
9129
9130         target_offsets = xcalloc(new_disks, sizeof(unsigned long long));
9131
9132         start = info->reshape_progress * 512;
9133         for (i = 0; i < new_disks; i++) {
9134                 target_offsets[i] = (unsigned long long)
9135                   __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
9136                 /* move back copy area adderss, it will be moved forward
9137                  * in restore_stripes() using start input variable
9138                  */
9139                 target_offsets[i] -= start/data_disks;
9140         }
9141
9142         if (open_backup_targets(info, new_disks, targets,
9143                                 super, dev))
9144                 goto abort;
9145
9146         dest_layout = imsm_level_to_layout(map_dest->raid_level);
9147         dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
9148
9149         if (restore_stripes(targets, /* list of dest devices */
9150                             target_offsets, /* migration record offsets */
9151                             new_disks,
9152                             dest_chunk,
9153                             map_dest->raid_level,
9154                             dest_layout,
9155                             -1,    /* source backup file descriptor */
9156                             0,     /* input buf offset
9157                                     * always 0 buf is already offseted */
9158                             start,
9159                             length,
9160                             buf) != 0) {
9161                 pr_err("Error restoring stripes\n");
9162                 goto abort;
9163         }
9164
9165         rv = 0;
9166
9167 abort:
9168         if (targets) {
9169                 close_targets(targets, new_disks);
9170                 free(targets);
9171         }
9172         free(target_offsets);
9173
9174         return rv;
9175 }
9176
9177 /*******************************************************************************
9178  * Function:    save_checkpoint_imsm
9179  * Description: Function called for current unit status update
9180  *              in the migration record. It writes it to disk.
9181  * Parameters:
9182  *      super   : imsm internal array info
9183  *      info    : general array info
9184  * Returns:
9185  *      0: success
9186  *      1: failure
9187  *      2: failure, means no valid migration record
9188  *                 / no general migration in progress /
9189  ******************************************************************************/
9190 int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
9191 {
9192         struct intel_super *super = st->sb;
9193         unsigned long long blocks_per_unit;
9194         unsigned long long curr_migr_unit;
9195
9196         if (load_imsm_migr_rec(super, info) != 0) {
9197                 dprintf("imsm: ERROR: Cannot read migration record "
9198                         "for checkpoint save.\n");
9199                 return 1;
9200         }
9201
9202         blocks_per_unit = __le32_to_cpu(super->migr_rec->blocks_per_unit);
9203         if (blocks_per_unit == 0) {
9204                 dprintf("imsm: no migration in progress.\n");
9205                 return 2;
9206         }
9207         curr_migr_unit = info->reshape_progress / blocks_per_unit;
9208         /* check if array is alligned to copy area
9209          * if it is not alligned, add one to current migration unit value
9210          * this can happend on array reshape finish only
9211          */
9212         if (info->reshape_progress % blocks_per_unit)
9213                 curr_migr_unit++;
9214
9215         super->migr_rec->curr_migr_unit =
9216                 __cpu_to_le32(curr_migr_unit);
9217         super->migr_rec->rec_status = __cpu_to_le32(state);
9218         super->migr_rec->dest_1st_member_lba =
9219                 __cpu_to_le32(curr_migr_unit *
9220                               __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
9221         if (write_imsm_migr_rec(st) < 0) {
9222                 dprintf("imsm: Cannot write migration record "
9223                         "outside backup area\n");
9224                 return 1;
9225         }
9226
9227         return 0;
9228 }
9229
9230 /*******************************************************************************
9231  * Function:    recover_backup_imsm
9232  * Description: Function recovers critical data from the Migration Copy Area
9233  *              while assembling an array.
9234  * Parameters:
9235  *      super   : imsm internal array info
9236  *      info    : general array info
9237  * Returns:
9238  *      0 : success (or there is no data to recover)
9239  *      1 : fail
9240  ******************************************************************************/
9241 int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
9242 {
9243         struct intel_super *super = st->sb;
9244         struct migr_record *migr_rec = super->migr_rec;
9245         struct imsm_map *map_dest = NULL;
9246         struct intel_dev *id = NULL;
9247         unsigned long long read_offset;
9248         unsigned long long write_offset;
9249         unsigned unit_len;
9250         int *targets = NULL;
9251         int new_disks, i, err;
9252         char *buf = NULL;
9253         int retval = 1;
9254         unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
9255         unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
9256         char buffer[20];
9257         int skipped_disks = 0;
9258
9259         err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
9260         if (err < 1)
9261                 return 1;
9262
9263         /* recover data only during assemblation */
9264         if (strncmp(buffer, "inactive", 8) != 0)
9265                 return 0;
9266         /* no data to recover */
9267         if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
9268                 return 0;
9269         if (curr_migr_unit >= num_migr_units)
9270                 return 1;
9271
9272         /* find device during reshape */
9273         for (id = super->devlist; id; id = id->next)
9274                 if (is_gen_migration(id->dev))
9275                         break;
9276         if (id == NULL)
9277                 return 1;
9278
9279         map_dest = get_imsm_map(id->dev, MAP_0);
9280         new_disks = map_dest->num_members;
9281
9282         read_offset = (unsigned long long)
9283                         __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
9284
9285         write_offset = ((unsigned long long)
9286                         __le32_to_cpu(migr_rec->dest_1st_member_lba) +
9287                         pba_of_lba0(map_dest)) * 512;
9288
9289         unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
9290         if (posix_memalign((void **)&buf, 512, unit_len) != 0)
9291                 goto abort;
9292         targets = xcalloc(new_disks, sizeof(int));
9293
9294         if (open_backup_targets(info, new_disks, targets, super, id->dev)) {
9295                 pr_err("Cannot open some devices belonging to array.\n");
9296                 goto abort;
9297         }
9298
9299         for (i = 0; i < new_disks; i++) {
9300                 if (targets[i] < 0) {
9301                         skipped_disks++;
9302                         continue;
9303                 }
9304                 if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
9305                         pr_err("Cannot seek to block: %s\n",
9306                                strerror(errno));
9307                         skipped_disks++;
9308                         continue;
9309                 }
9310                 if ((unsigned)read(targets[i], buf, unit_len) != unit_len) {
9311                         pr_err("Cannot read copy area block: %s\n",
9312                                strerror(errno));
9313                         skipped_disks++;
9314                         continue;
9315                 }
9316                 if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
9317                         pr_err("Cannot seek to block: %s\n",
9318                                strerror(errno));
9319                         skipped_disks++;
9320                         continue;
9321                 }
9322                 if ((unsigned)write(targets[i], buf, unit_len) != unit_len) {
9323                         pr_err("Cannot restore block: %s\n",
9324                                strerror(errno));
9325                         skipped_disks++;
9326                         continue;
9327                 }
9328         }
9329
9330         if (skipped_disks > imsm_get_allowed_degradation(info->new_level,
9331                                                          new_disks,
9332                                                          super,
9333                                                          id->dev)) {
9334                 pr_err("Cannot restore data from backup."
9335                        " Too many failed disks\n");
9336                 goto abort;
9337         }
9338
9339         if (save_checkpoint_imsm(st, info, UNIT_SRC_NORMAL)) {
9340                 /* ignore error == 2, this can mean end of reshape here
9341                  */
9342                 dprintf("imsm: Cannot write checkpoint to "
9343                         "migration record (UNIT_SRC_NORMAL) during restart\n");
9344         } else
9345                 retval = 0;
9346
9347 abort:
9348         if (targets) {
9349                 for (i = 0; i < new_disks; i++)
9350                         if (targets[i])
9351                                 close(targets[i]);
9352                 free(targets);
9353         }
9354         free(buf);
9355         return retval;
9356 }
9357
9358 static char disk_by_path[] = "/dev/disk/by-path/";
9359
9360 static const char *imsm_get_disk_controller_domain(const char *path)
9361 {
9362         char disk_path[PATH_MAX];
9363         char *drv=NULL;
9364         struct stat st;
9365
9366         strncpy(disk_path, disk_by_path, PATH_MAX - 1);
9367         strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
9368         if (stat(disk_path, &st) == 0) {
9369                 struct sys_dev* hba;
9370                 char *path=NULL;
9371
9372                 path = devt_to_devpath(st.st_rdev);
9373                 if (path == NULL)
9374                         return "unknown";
9375                 hba = find_disk_attached_hba(-1, path);
9376                 if (hba && hba->type == SYS_DEV_SAS)
9377                         drv = "isci";
9378                 else if (hba && hba->type == SYS_DEV_SATA)
9379                         drv = "ahci";
9380                 else
9381                         drv = "unknown";
9382                 dprintf("path: %s hba: %s attached: %s\n",
9383                         path, (hba) ? hba->path : "NULL", drv);
9384                 free(path);
9385         }
9386         return drv;
9387 }
9388
9389 static char *imsm_find_array_devnm_by_subdev(int subdev, char *container)
9390 {
9391         static char devnm[32];
9392         char subdev_name[20];
9393         struct mdstat_ent *mdstat;
9394
9395         sprintf(subdev_name, "%d", subdev);
9396         mdstat = mdstat_by_subdev(subdev_name, container);
9397         if (!mdstat)
9398                 return NULL;
9399
9400         strcpy(devnm, mdstat->devnm);
9401         free_mdstat(mdstat);
9402         return devnm;
9403 }
9404
9405 static int imsm_reshape_is_allowed_on_container(struct supertype *st,
9406                                                 struct geo_params *geo,
9407                                                 int *old_raid_disks,
9408                                                 int direction)
9409 {
9410         /* currently we only support increasing the number of devices
9411          * for a container.  This increases the number of device for each
9412          * member array.  They must all be RAID0 or RAID5.
9413          */
9414         int ret_val = 0;
9415         struct mdinfo *info, *member;
9416         int devices_that_can_grow = 0;
9417
9418         dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
9419                 "st->devnm = (%s)\n", st->devnm);
9420
9421         if (geo->size > 0 ||
9422             geo->level != UnSet ||
9423             geo->layout != UnSet ||
9424             geo->chunksize != 0 ||
9425             geo->raid_disks == UnSet) {
9426                 dprintf("imsm: Container operation is allowed for "
9427                         "raid disks number change only.\n");
9428                 return ret_val;
9429         }
9430
9431         if (direction == ROLLBACK_METADATA_CHANGES) {
9432                 dprintf("imsm: Metadata changes rollback is not supported for "
9433                         "container operation.\n");
9434                 return ret_val;
9435         }
9436
9437         info = container_content_imsm(st, NULL);
9438         for (member = info; member; member = member->next) {
9439                 char *result;
9440
9441                 dprintf("imsm: checking device_num: %i\n",
9442                         member->container_member);
9443
9444                 if (geo->raid_disks <= member->array.raid_disks) {
9445                         /* we work on container for Online Capacity Expansion
9446                          * only so raid_disks has to grow
9447                          */
9448                         dprintf("imsm: for container operation raid disks "
9449                                 "increase is required\n");
9450                         break;
9451                 }
9452
9453                 if ((info->array.level != 0) &&
9454                     (info->array.level != 5)) {
9455                         /* we cannot use this container with other raid level
9456                          */
9457                         dprintf("imsm: for container operation wrong"
9458                                 " raid level (%i) detected\n",
9459                                 info->array.level);
9460                         break;
9461                 } else {
9462                         /* check for platform support
9463                          * for this raid level configuration
9464                          */
9465                         struct intel_super *super = st->sb;
9466                         if (!is_raid_level_supported(super->orom,
9467                                                      member->array.level,
9468                                                      geo->raid_disks)) {
9469                                 dprintf("platform does not support raid%d with"
9470                                         " %d disk%s\n",
9471                                          info->array.level,
9472                                          geo->raid_disks,
9473                                          geo->raid_disks > 1 ? "s" : "");
9474                                 break;
9475                         }
9476                         /* check if component size is aligned to chunk size
9477                          */
9478                         if (info->component_size %
9479                             (info->array.chunk_size/512)) {
9480                                 dprintf("Component size is not aligned to "
9481                                         "chunk size\n");
9482                                 break;
9483                         }
9484                 }
9485
9486                 if (*old_raid_disks &&
9487                     info->array.raid_disks != *old_raid_disks)
9488                         break;
9489                 *old_raid_disks = info->array.raid_disks;
9490
9491                 /* All raid5 and raid0 volumes in container
9492                  * have to be ready for Online Capacity Expansion
9493                  * so they need to be assembled.  We have already
9494                  * checked that no recovery etc is happening.
9495                  */
9496                 result = imsm_find_array_devnm_by_subdev(member->container_member,
9497                                                          st->container_devnm);
9498                 if (result == NULL) {
9499                         dprintf("imsm: cannot find array\n");
9500                         break;
9501                 }
9502                 devices_that_can_grow++;
9503         }
9504         sysfs_free(info);
9505         if (!member && devices_that_can_grow)
9506                 ret_val = 1;
9507
9508         if (ret_val)
9509                 dprintf("\tContainer operation allowed\n");
9510         else
9511                 dprintf("\tError: %i\n", ret_val);
9512
9513         return ret_val;
9514 }
9515
9516 /* Function: get_spares_for_grow
9517  * Description: Allocates memory and creates list of spare devices
9518  *              avaliable in container. Checks if spare drive size is acceptable.
9519  * Parameters: Pointer to the supertype structure
9520  * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
9521  *              NULL if fail
9522  */
9523 static struct mdinfo *get_spares_for_grow(struct supertype *st)
9524 {
9525         unsigned long long min_size = min_acceptable_spare_size_imsm(st);
9526         return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
9527 }
9528
9529 /******************************************************************************
9530  * function: imsm_create_metadata_update_for_reshape
9531  * Function creates update for whole IMSM container.
9532  *
9533  ******************************************************************************/
9534 static int imsm_create_metadata_update_for_reshape(
9535         struct supertype *st,
9536         struct geo_params *geo,
9537         int old_raid_disks,
9538         struct imsm_update_reshape **updatep)
9539 {
9540         struct intel_super *super = st->sb;
9541         struct imsm_super *mpb = super->anchor;
9542         int update_memory_size = 0;
9543         struct imsm_update_reshape *u = NULL;
9544         struct mdinfo *spares = NULL;
9545         int i;
9546         int delta_disks = 0;
9547         struct mdinfo *dev;
9548
9549         dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
9550                 geo->raid_disks);
9551
9552         delta_disks = geo->raid_disks - old_raid_disks;
9553
9554         /* size of all update data without anchor */
9555         update_memory_size = sizeof(struct imsm_update_reshape);
9556
9557         /* now add space for spare disks that we need to add. */
9558         update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
9559
9560         u = xcalloc(1, update_memory_size);
9561         u->type = update_reshape_container_disks;
9562         u->old_raid_disks = old_raid_disks;
9563         u->new_raid_disks = geo->raid_disks;
9564
9565         /* now get spare disks list
9566          */
9567         spares = get_spares_for_grow(st);
9568
9569         if (spares == NULL
9570             || delta_disks > spares->array.spare_disks) {
9571                 pr_err("imsm: ERROR: Cannot get spare devices "
9572                         "for %s.\n", geo->dev_name);
9573                 i = -1;
9574                 goto abort;
9575         }
9576
9577         /* we have got spares
9578          * update disk list in imsm_disk list table in anchor
9579          */
9580         dprintf("imsm: %i spares are available.\n\n",
9581                 spares->array.spare_disks);
9582
9583         dev = spares->devs;
9584         for (i = 0; i < delta_disks; i++) {
9585                 struct dl *dl;
9586
9587                 if (dev == NULL)
9588                         break;
9589                 u->new_disks[i] = makedev(dev->disk.major,
9590                                           dev->disk.minor);
9591                 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
9592                 dl->index = mpb->num_disks;
9593                 mpb->num_disks++;
9594                 dev = dev->next;
9595         }
9596
9597 abort:
9598         /* free spares
9599          */
9600         sysfs_free(spares);
9601
9602         dprintf("imsm: reshape update preparation :");
9603         if (i == delta_disks) {
9604                 dprintf(" OK\n");
9605                 *updatep = u;
9606                 return update_memory_size;
9607         }
9608         free(u);
9609         dprintf(" Error\n");
9610
9611         return 0;
9612 }
9613
9614
9615 /******************************************************************************
9616  * function: imsm_create_metadata_update_for_size_change()
9617  *           Creates update for IMSM array for array size change.
9618  *
9619  ******************************************************************************/
9620 static int imsm_create_metadata_update_for_size_change(
9621                                 struct supertype *st,
9622                                 struct geo_params *geo,
9623                                 struct imsm_update_size_change **updatep)
9624 {
9625         struct intel_super *super = st->sb;
9626         int update_memory_size = 0;
9627         struct imsm_update_size_change *u = NULL;
9628
9629         dprintf("imsm_create_metadata_update_for_size_change(enter)"
9630                 " New size = %llu\n", geo->size);
9631
9632         /* size of all update data without anchor */
9633         update_memory_size = sizeof(struct imsm_update_size_change);
9634
9635         u = xcalloc(1, update_memory_size);
9636         u->type = update_size_change;
9637         u->subdev = super->current_vol;
9638         u->new_size = geo->size;
9639
9640         dprintf("imsm: reshape update preparation : OK\n");
9641         *updatep = u;
9642
9643         return update_memory_size;
9644 }
9645
9646 /******************************************************************************
9647  * function: imsm_create_metadata_update_for_migration()
9648  *           Creates update for IMSM array.
9649  *
9650  ******************************************************************************/
9651 static int imsm_create_metadata_update_for_migration(
9652                                         struct supertype *st,
9653                                         struct geo_params *geo,
9654                                         struct imsm_update_reshape_migration **updatep)
9655 {
9656         struct intel_super *super = st->sb;
9657         int update_memory_size = 0;
9658         struct imsm_update_reshape_migration *u = NULL;
9659         struct imsm_dev *dev;
9660         int previous_level = -1;
9661
9662         dprintf("imsm_create_metadata_update_for_migration(enter)"
9663                 " New Level = %i\n", geo->level);
9664
9665         /* size of all update data without anchor */
9666         update_memory_size = sizeof(struct imsm_update_reshape_migration);
9667
9668         u = xcalloc(1, update_memory_size);
9669         u->type = update_reshape_migration;
9670         u->subdev = super->current_vol;
9671         u->new_level = geo->level;
9672         u->new_layout = geo->layout;
9673         u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
9674         u->new_disks[0] = -1;
9675         u->new_chunksize = -1;
9676
9677         dev = get_imsm_dev(super, u->subdev);
9678         if (dev) {
9679                 struct imsm_map *map;
9680
9681                 map = get_imsm_map(dev, MAP_0);
9682                 if (map) {
9683                         int current_chunk_size =
9684                                 __le16_to_cpu(map->blocks_per_strip) / 2;
9685
9686                         if (geo->chunksize != current_chunk_size) {
9687                                 u->new_chunksize = geo->chunksize / 1024;
9688                                 dprintf("imsm: "
9689                                         "chunk size change from %i to %i\n",
9690                                         current_chunk_size, u->new_chunksize);
9691                         }
9692                         previous_level = map->raid_level;
9693                 }
9694         }
9695         if ((geo->level == 5) && (previous_level == 0)) {
9696                 struct mdinfo *spares = NULL;
9697
9698                 u->new_raid_disks++;
9699                 spares = get_spares_for_grow(st);
9700                 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
9701                         free(u);
9702                         sysfs_free(spares);
9703                         update_memory_size = 0;
9704                         dprintf("error: cannot get spare device "
9705                                 "for requested migration");
9706                         return 0;
9707                 }
9708                 sysfs_free(spares);
9709         }
9710         dprintf("imsm: reshape update preparation : OK\n");
9711         *updatep = u;
9712
9713         return update_memory_size;
9714 }
9715
9716 static void imsm_update_metadata_locally(struct supertype *st,
9717                                          void *buf, int len)
9718 {
9719         struct metadata_update mu;
9720
9721         mu.buf = buf;
9722         mu.len = len;
9723         mu.space = NULL;
9724         mu.space_list = NULL;
9725         mu.next = NULL;
9726         imsm_prepare_update(st, &mu);
9727         imsm_process_update(st, &mu);
9728
9729         while (mu.space_list) {
9730                 void **space = mu.space_list;
9731                 mu.space_list = *space;
9732                 free(space);
9733         }
9734 }
9735
9736 /***************************************************************************
9737 * Function:     imsm_analyze_change
9738 * Description:  Function analyze change for single volume
9739 *               and validate if transition is supported
9740 * Parameters:   Geometry parameters, supertype structure,
9741 *               metadata change direction (apply/rollback)
9742 * Returns:      Operation type code on success, -1 if fail
9743 ****************************************************************************/
9744 enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
9745                                            struct geo_params *geo,
9746                                            int direction)
9747 {
9748         struct mdinfo info;
9749         int change = -1;
9750         int check_devs = 0;
9751         int chunk;
9752         /* number of added/removed disks in operation result */
9753         int devNumChange = 0;
9754         /* imsm compatible layout value for array geometry verification */
9755         int imsm_layout = -1;
9756         int data_disks;
9757         struct imsm_dev *dev;
9758         struct intel_super *super;
9759         unsigned long long current_size;
9760         unsigned long long free_size;
9761         unsigned long long max_size;
9762         int rv;
9763
9764         getinfo_super_imsm_volume(st, &info, NULL);
9765         if ((geo->level != info.array.level) &&
9766             (geo->level >= 0) &&
9767             (geo->level != UnSet)) {
9768                 switch (info.array.level) {
9769                 case 0:
9770                         if (geo->level == 5) {
9771                                 change = CH_MIGRATION;
9772                                 if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) {
9773                                         pr_err("Error. Requested Layout "
9774                                                "not supported (left-asymmetric layout "
9775                                                "is supported only)!\n");
9776                                         change = -1;
9777                                         goto analyse_change_exit;
9778                                 }
9779                                 imsm_layout =  geo->layout;
9780                                 check_devs = 1;
9781                                 devNumChange = 1; /* parity disk added */
9782                         } else if (geo->level == 10) {
9783                                 change = CH_TAKEOVER;
9784                                 check_devs = 1;
9785                                 devNumChange = 2; /* two mirrors added */
9786                                 imsm_layout = 0x102; /* imsm supported layout */
9787                         }
9788                         break;
9789                 case 1:
9790                 case 10:
9791                         if (geo->level == 0) {
9792                                 change = CH_TAKEOVER;
9793                                 check_devs = 1;
9794                                 devNumChange = -(geo->raid_disks/2);
9795                                 imsm_layout = 0; /* imsm raid0 layout */
9796                         }
9797                         break;
9798                 }
9799                 if (change == -1) {
9800                         pr_err("Error. Level Migration from %d to %d "
9801                                "not supported!\n",
9802                                info.array.level, geo->level);
9803                         goto analyse_change_exit;
9804                 }
9805         } else
9806                 geo->level = info.array.level;
9807
9808         if ((geo->layout != info.array.layout)
9809             && ((geo->layout != UnSet) && (geo->layout != -1))) {
9810                 change = CH_MIGRATION;
9811                 if ((info.array.layout == 0)
9812                     && (info.array.level == 5)
9813                     && (geo->layout == 5)) {
9814                         /* reshape 5 -> 4 */
9815                 } else if ((info.array.layout == 5)
9816                            && (info.array.level == 5)
9817                            && (geo->layout == 0)) {
9818                         /* reshape 4 -> 5 */
9819                         geo->layout = 0;
9820                         geo->level = 5;
9821                 } else {
9822                         pr_err("Error. Layout Migration from %d to %d "
9823                                "not supported!\n",
9824                                info.array.layout, geo->layout);
9825                         change = -1;
9826                         goto analyse_change_exit;
9827                 }
9828         } else {
9829                 geo->layout = info.array.layout;
9830                 if (imsm_layout == -1)
9831                         imsm_layout = info.array.layout;
9832         }
9833
9834         if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
9835             && (geo->chunksize != info.array.chunk_size))
9836                 change = CH_MIGRATION;
9837         else
9838                 geo->chunksize = info.array.chunk_size;
9839
9840         chunk = geo->chunksize / 1024;
9841
9842         super = st->sb;
9843         dev = get_imsm_dev(super, super->current_vol);
9844         data_disks = imsm_num_data_members(dev , MAP_0);
9845         /* compute current size per disk member
9846          */
9847         current_size = info.custom_array_size / data_disks;
9848
9849         if ((geo->size > 0) && (geo->size != MAX_SIZE)) {
9850                 /* align component size
9851                  */
9852                 geo->size = imsm_component_size_aligment_check(
9853                                     get_imsm_raid_level(dev->vol.map),
9854                                     chunk * 1024,
9855                                     geo->size * 2);
9856                 if (geo->size == 0) {
9857                         pr_err("Error. Size expansion is " \
9858                                    "supported only (current size is %llu, " \
9859                                    "requested size /rounded/ is 0).\n",
9860                                    current_size);
9861                         goto analyse_change_exit;
9862                 }
9863         }
9864
9865         if ((current_size != geo->size) && (geo->size > 0)) {
9866                 if (change != -1) {
9867                         pr_err("Error. Size change should be the only "
9868                                 "one at a time.\n");
9869                         change = -1;
9870                         goto analyse_change_exit;
9871                 }
9872                 if ((super->current_vol + 1) != super->anchor->num_raid_devs) {
9873                         pr_err("Error. The last volume in container "
9874                                "can be expanded only (%i/%s).\n",
9875                                super->current_vol, st->devnm);
9876                         goto analyse_change_exit;
9877                 }
9878                 /* check the maximum available size
9879                  */
9880                 rv =  imsm_get_free_size(st, dev->vol.map->num_members,
9881                                          0, chunk, &free_size);
9882                 if (rv == 0)
9883                         /* Cannot find maximum available space
9884                          */
9885                         max_size = 0;
9886                 else {
9887                         max_size = free_size + current_size;
9888                         /* align component size
9889                          */
9890                         max_size = imsm_component_size_aligment_check(
9891                                         get_imsm_raid_level(dev->vol.map),
9892                                         chunk * 1024,
9893                                         max_size);
9894                 }
9895                 if (geo->size == MAX_SIZE) {
9896                         /* requested size change to the maximum available size
9897                          */
9898                         if (max_size == 0) {
9899                                 pr_err("Error. Cannot find "
9900                                         "maximum available space.\n");
9901                                 change = -1;
9902                                 goto analyse_change_exit;
9903                         } else
9904                                 geo->size = max_size;
9905                 }
9906
9907                 if ((direction == ROLLBACK_METADATA_CHANGES)) {
9908                         /* accept size for rollback only
9909                         */
9910                 } else {
9911                         /* round size due to metadata compatibility
9912                         */
9913                         geo->size = (geo->size >> SECT_PER_MB_SHIFT)
9914                                     << SECT_PER_MB_SHIFT;
9915                         dprintf("Prepare update for size change to %llu\n",
9916                                 geo->size );
9917                         if (current_size >= geo->size) {
9918                                 pr_err("Error. Size expansion is "
9919                                        "supported only (current size is %llu, "
9920                                        "requested size /rounded/ is %llu).\n",
9921                                        current_size, geo->size);
9922                                 goto analyse_change_exit;
9923                         }
9924                         if (max_size && geo->size > max_size) {
9925                                 pr_err("Error. Requested size is larger "
9926                                        "than maximum available size (maximum "
9927                                        "available size is %llu, "
9928                                        "requested size /rounded/ is %llu).\n",
9929                                        max_size, geo->size);
9930                                 goto analyse_change_exit;
9931                         }
9932                 }
9933                 geo->size *= data_disks;
9934                 geo->raid_disks = dev->vol.map->num_members;
9935                 change = CH_ARRAY_SIZE;
9936         }
9937         if (!validate_geometry_imsm(st,
9938                                     geo->level,
9939                                     imsm_layout,
9940                                     geo->raid_disks + devNumChange,
9941                                     &chunk,
9942                                     geo->size, INVALID_SECTORS,
9943                                     0, 0, 1))
9944                 change = -1;
9945
9946         if (check_devs) {
9947                 struct intel_super *super = st->sb;
9948                 struct imsm_super *mpb = super->anchor;
9949
9950                 if (mpb->num_raid_devs > 1) {
9951                         pr_err("Error. Cannot perform operation on %s"
9952                                "- for this operation it MUST be single "
9953                                "array in container\n",
9954                                geo->dev_name);
9955                         change = -1;
9956                 }
9957         }
9958
9959 analyse_change_exit:
9960         if ((direction == ROLLBACK_METADATA_CHANGES) &&
9961              ((change == CH_MIGRATION) || (change == CH_TAKEOVER))) {
9962                 dprintf("imsm: Metadata changes rollback is not supported for "
9963                         "migration and takeover operations.\n");
9964                 change = -1;
9965         }
9966         return change;
9967 }
9968
9969 int imsm_takeover(struct supertype *st, struct geo_params *geo)
9970 {
9971         struct intel_super *super = st->sb;
9972         struct imsm_update_takeover *u;
9973
9974         u = xmalloc(sizeof(struct imsm_update_takeover));
9975
9976         u->type = update_takeover;
9977         u->subarray = super->current_vol;
9978
9979         /* 10->0 transition */
9980         if (geo->level == 0)
9981                 u->direction = R10_TO_R0;
9982
9983         /* 0->10 transition */
9984         if (geo->level == 10)
9985                 u->direction = R0_TO_R10;
9986
9987         /* update metadata locally */
9988         imsm_update_metadata_locally(st, u,
9989                                         sizeof(struct imsm_update_takeover));
9990         /* and possibly remotely */
9991         if (st->update_tail)
9992                 append_metadata_update(st, u,
9993                                         sizeof(struct imsm_update_takeover));
9994         else
9995                 free(u);
9996
9997         return 0;
9998 }
9999
10000 static int imsm_reshape_super(struct supertype *st, unsigned long long size,
10001                               int level,
10002                               int layout, int chunksize, int raid_disks,
10003                               int delta_disks, char *backup, char *dev,
10004                               int direction, int verbose)
10005 {
10006         int ret_val = 1;
10007         struct geo_params geo;
10008
10009         dprintf("imsm: reshape_super called.\n");
10010
10011         memset(&geo, 0, sizeof(struct geo_params));
10012
10013         geo.dev_name = dev;
10014         strcpy(geo.devnm, st->devnm);
10015         geo.size = size;
10016         geo.level = level;
10017         geo.layout = layout;
10018         geo.chunksize = chunksize;
10019         geo.raid_disks = raid_disks;
10020         if (delta_disks != UnSet)
10021                 geo.raid_disks += delta_disks;
10022
10023         dprintf("\tfor level      : %i\n", geo.level);
10024         dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
10025
10026         if (experimental() == 0)
10027                 return ret_val;
10028
10029         if (strcmp(st->container_devnm, st->devnm) == 0) {
10030                 /* On container level we can only increase number of devices. */
10031                 dprintf("imsm: info: Container operation\n");
10032                 int old_raid_disks = 0;
10033
10034                 if (imsm_reshape_is_allowed_on_container(
10035                             st, &geo, &old_raid_disks, direction)) {
10036                         struct imsm_update_reshape *u = NULL;
10037                         int len;
10038
10039                         len = imsm_create_metadata_update_for_reshape(
10040                                 st, &geo, old_raid_disks, &u);
10041
10042                         if (len <= 0) {
10043                                 dprintf("imsm: Cannot prepare update\n");
10044                                 goto exit_imsm_reshape_super;
10045                         }
10046
10047                         ret_val = 0;
10048                         /* update metadata locally */
10049                         imsm_update_metadata_locally(st, u, len);
10050                         /* and possibly remotely */
10051                         if (st->update_tail)
10052                                 append_metadata_update(st, u, len);
10053                         else
10054                                 free(u);
10055
10056                 } else {
10057                         pr_err("(imsm) Operation "
10058                                 "is not allowed on this container\n");
10059                 }
10060         } else {
10061                 /* On volume level we support following operations
10062                  * - takeover: raid10 -> raid0; raid0 -> raid10
10063                  * - chunk size migration
10064                  * - migration: raid5 -> raid0; raid0 -> raid5
10065                  */
10066                 struct intel_super *super = st->sb;
10067                 struct intel_dev *dev = super->devlist;
10068                 int change;
10069                 dprintf("imsm: info: Volume operation\n");
10070                 /* find requested device */
10071                 while (dev) {
10072                         char *devnm =
10073                                 imsm_find_array_devnm_by_subdev(
10074                                         dev->index, st->container_devnm);
10075                         if (devnm && strcmp(devnm, geo.devnm) == 0)
10076                                 break;
10077                         dev = dev->next;
10078                 }
10079                 if (dev == NULL) {
10080                         pr_err("Cannot find %s (%s) subarray\n",
10081                                 geo.dev_name, geo.devnm);
10082                         goto exit_imsm_reshape_super;
10083                 }
10084                 super->current_vol = dev->index;
10085                 change = imsm_analyze_change(st, &geo, direction);
10086                 switch (change) {
10087                 case CH_TAKEOVER:
10088                         ret_val = imsm_takeover(st, &geo);
10089                         break;
10090                 case CH_MIGRATION: {
10091                         struct imsm_update_reshape_migration *u = NULL;
10092                         int len =
10093                                 imsm_create_metadata_update_for_migration(
10094                                         st, &geo, &u);
10095                         if (len < 1) {
10096                                 dprintf("imsm: "
10097                                         "Cannot prepare update\n");
10098                                 break;
10099                         }
10100                         ret_val = 0;
10101                         /* update metadata locally */
10102                         imsm_update_metadata_locally(st, u, len);
10103                         /* and possibly remotely */
10104                         if (st->update_tail)
10105                                 append_metadata_update(st, u, len);
10106                         else
10107                                 free(u);
10108                 }
10109                 break;
10110                 case CH_ARRAY_SIZE: {
10111                         struct imsm_update_size_change *u = NULL;
10112                         int len =
10113                                 imsm_create_metadata_update_for_size_change(
10114                                         st, &geo, &u);
10115                         if (len < 1) {
10116                                 dprintf("imsm: "
10117                                         "Cannot prepare update\n");
10118                                 break;
10119                         }
10120                         ret_val = 0;
10121                         /* update metadata locally */
10122                         imsm_update_metadata_locally(st, u, len);
10123                         /* and possibly remotely */
10124                         if (st->update_tail)
10125                                 append_metadata_update(st, u, len);
10126                         else
10127                                 free(u);
10128                 }
10129                 break;
10130                 default:
10131                         ret_val = 1;
10132                 }
10133         }
10134
10135 exit_imsm_reshape_super:
10136         dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
10137         return ret_val;
10138 }
10139
10140 /*******************************************************************************
10141  * Function:    wait_for_reshape_imsm
10142  * Description: Function writes new sync_max value and waits until
10143  *              reshape process reach new position
10144  * Parameters:
10145  *      sra             : general array info
10146  *      ndata           : number of disks in new array's layout
10147  * Returns:
10148  *       0 : success,
10149  *       1 : there is no reshape in progress,
10150  *      -1 : fail
10151  ******************************************************************************/
10152 int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
10153 {
10154         int fd = sysfs_get_fd(sra, NULL, "reshape_position");
10155         unsigned long long completed;
10156         /* to_complete : new sync_max position */
10157         unsigned long long to_complete = sra->reshape_progress;
10158         unsigned long long position_to_set = to_complete / ndata;
10159
10160         if (fd < 0) {
10161                 dprintf("imsm: wait_for_reshape_imsm() "
10162                         "cannot open reshape_position\n");
10163                 return 1;
10164         }
10165
10166         if (sysfs_fd_get_ll(fd, &completed) < 0) {
10167                 dprintf("imsm: wait_for_reshape_imsm() "
10168                         "cannot read reshape_position (no reshape in progres)\n");
10169                 close(fd);
10170                 return 0;
10171         }
10172
10173         if (completed > to_complete) {
10174                 dprintf("imsm: wait_for_reshape_imsm() "
10175                         "wrong next position to set %llu (%llu)\n",
10176                         to_complete, completed);
10177                 close(fd);
10178                 return -1;
10179         }
10180         dprintf("Position set: %llu\n", position_to_set);
10181         if (sysfs_set_num(sra, NULL, "sync_max",
10182                           position_to_set) != 0) {
10183                 dprintf("imsm: wait_for_reshape_imsm() "
10184                         "cannot set reshape position to %llu\n",
10185                         position_to_set);
10186                 close(fd);
10187                 return -1;
10188         }
10189
10190         do {
10191                 char action[20];
10192                 fd_set rfds;
10193                 FD_ZERO(&rfds);
10194                 FD_SET(fd, &rfds);
10195                 select(fd+1, &rfds, NULL, NULL, NULL);
10196                 if (sysfs_get_str(sra, NULL, "sync_action",
10197                                   action, 20) > 0 &&
10198                                 strncmp(action, "reshape", 7) != 0)
10199                         break;
10200                 if (sysfs_fd_get_ll(fd, &completed) < 0) {
10201                         dprintf("imsm: wait_for_reshape_imsm() "
10202                                 "cannot read reshape_position (in loop)\n");
10203                         close(fd);
10204                         return 1;
10205                 }
10206         } while (completed < to_complete);
10207         close(fd);
10208         return 0;
10209
10210 }
10211
10212 /*******************************************************************************
10213  * Function:    check_degradation_change
10214  * Description: Check that array hasn't become failed.
10215  * Parameters:
10216  *      info    : for sysfs access
10217  *      sources : source disks descriptors
10218  *      degraded: previous degradation level
10219  * Returns:
10220  *      degradation level
10221  ******************************************************************************/
10222 int check_degradation_change(struct mdinfo *info,
10223                              int *sources,
10224                              int degraded)
10225 {
10226         unsigned long long new_degraded;
10227         int rv;
10228
10229         rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
10230         if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) {
10231                 /* check each device to ensure it is still working */
10232                 struct mdinfo *sd;
10233                 new_degraded = 0;
10234                 for (sd = info->devs ; sd ; sd = sd->next) {
10235                         if (sd->disk.state & (1<<MD_DISK_FAULTY))
10236                                 continue;
10237                         if (sd->disk.state & (1<<MD_DISK_SYNC)) {
10238                                 char sbuf[20];
10239                                 if (sysfs_get_str(info,
10240                                         sd, "state", sbuf, 20) < 0 ||
10241                                         strstr(sbuf, "faulty") ||
10242                                         strstr(sbuf, "in_sync") == NULL) {
10243                                         /* this device is dead */
10244                                         sd->disk.state = (1<<MD_DISK_FAULTY);
10245                                         if (sd->disk.raid_disk >= 0 &&
10246                                             sources[sd->disk.raid_disk] >= 0) {
10247                                                 close(sources[
10248                                                         sd->disk.raid_disk]);
10249                                                 sources[sd->disk.raid_disk] =
10250                                                         -1;
10251                                         }
10252                                         new_degraded++;
10253                                 }
10254                         }
10255                 }
10256         }
10257
10258         return new_degraded;
10259 }
10260
10261 /*******************************************************************************
10262  * Function:    imsm_manage_reshape
10263  * Description: Function finds array under reshape and it manages reshape
10264  *              process. It creates stripes backups (if required) and sets
10265  *              checheckpoits.
10266  * Parameters:
10267  *      afd             : Backup handle (nattive) - not used
10268  *      sra             : general array info
10269  *      reshape         : reshape parameters - not used
10270  *      st              : supertype structure
10271  *      blocks          : size of critical section [blocks]
10272  *      fds             : table of source device descriptor
10273  *      offsets         : start of array (offest per devices)
10274  *      dests           : not used
10275  *      destfd          : table of destination device descriptor
10276  *      destoffsets     : table of destination offsets (per device)
10277  * Returns:
10278  *      1 : success, reshape is done
10279  *      0 : fail
10280  ******************************************************************************/
10281 static int imsm_manage_reshape(
10282         int afd, struct mdinfo *sra, struct reshape *reshape,
10283         struct supertype *st, unsigned long backup_blocks,
10284         int *fds, unsigned long long *offsets,
10285         int dests, int *destfd, unsigned long long *destoffsets)
10286 {
10287         int ret_val = 0;
10288         struct intel_super *super = st->sb;
10289         struct intel_dev *dv = NULL;
10290         struct imsm_dev *dev = NULL;
10291         struct imsm_map *map_src;
10292         int migr_vol_qan = 0;
10293         int ndata, odata; /* [bytes] */
10294         int chunk; /* [bytes] */
10295         struct migr_record *migr_rec;
10296         char *buf = NULL;
10297         unsigned int buf_size; /* [bytes] */
10298         unsigned long long max_position; /* array size [bytes] */
10299         unsigned long long next_step; /* [blocks]/[bytes] */
10300         unsigned long long old_data_stripe_length;
10301         unsigned long long start_src; /* [bytes] */
10302         unsigned long long start; /* [bytes] */
10303         unsigned long long start_buf_shift; /* [bytes] */
10304         int degraded = 0;
10305         int source_layout = 0;
10306
10307         if (!fds || !offsets || !sra)
10308                 goto abort;
10309
10310         /* Find volume during the reshape */
10311         for (dv = super->devlist; dv; dv = dv->next) {
10312                 if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
10313                     && dv->dev->vol.migr_state == 1) {
10314                         dev = dv->dev;
10315                         migr_vol_qan++;
10316                 }
10317         }
10318         /* Only one volume can migrate at the same time */
10319         if (migr_vol_qan != 1) {
10320                 pr_err(": %s", migr_vol_qan ?
10321                         "Number of migrating volumes greater than 1\n" :
10322                         "There is no volume during migrationg\n");
10323                 goto abort;
10324         }
10325
10326         map_src = get_imsm_map(dev, MAP_1);
10327         if (map_src == NULL)
10328                 goto abort;
10329
10330         ndata = imsm_num_data_members(dev, MAP_0);
10331         odata = imsm_num_data_members(dev, MAP_1);
10332
10333         chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512;
10334         old_data_stripe_length = odata * chunk;
10335
10336         migr_rec = super->migr_rec;
10337
10338         /* initialize migration record for start condition */
10339         if (sra->reshape_progress == 0)
10340                 init_migr_record_imsm(st, dev, sra);
10341         else {
10342                 if (__le32_to_cpu(migr_rec->rec_status) != UNIT_SRC_NORMAL) {
10343                         dprintf("imsm: cannot restart migration when data "
10344                                 "are present in copy area.\n");
10345                         goto abort;
10346                 }
10347                 /* Save checkpoint to update migration record for current
10348                  * reshape position (in md). It can be farther than current
10349                  * reshape position in metadata.
10350                  */
10351                 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
10352                         /* ignore error == 2, this can mean end of reshape here
10353                          */
10354                         dprintf("imsm: Cannot write checkpoint to "
10355                                 "migration record (UNIT_SRC_NORMAL, "
10356                                 "initial save)\n");
10357                         goto abort;
10358                 }
10359         }
10360
10361         /* size for data */
10362         buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
10363         /* extend  buffer size for parity disk */
10364         buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
10365         /* add space for stripe aligment */
10366         buf_size += old_data_stripe_length;
10367         if (posix_memalign((void **)&buf, 4096, buf_size)) {
10368                 dprintf("imsm: Cannot allocate checpoint buffer\n");
10369                 goto abort;
10370         }
10371
10372         max_position = sra->component_size * ndata;
10373         source_layout = imsm_level_to_layout(map_src->raid_level);
10374
10375         while (__le32_to_cpu(migr_rec->curr_migr_unit) <
10376                __le32_to_cpu(migr_rec->num_migr_units)) {
10377                 /* current reshape position [blocks] */
10378                 unsigned long long current_position =
10379                         __le32_to_cpu(migr_rec->blocks_per_unit)
10380                         * __le32_to_cpu(migr_rec->curr_migr_unit);
10381                 unsigned long long border;
10382
10383                 /* Check that array hasn't become failed.
10384                  */
10385                 degraded = check_degradation_change(sra, fds, degraded);
10386                 if (degraded > 1) {
10387                         dprintf("imsm: Abort reshape due to degradation"
10388                                 " level (%i)\n", degraded);
10389                         goto abort;
10390                 }
10391
10392                 next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
10393
10394                 if ((current_position + next_step) > max_position)
10395                         next_step = max_position - current_position;
10396
10397                 start = current_position * 512;
10398
10399                 /* allign reading start to old geometry */
10400                 start_buf_shift = start % old_data_stripe_length;
10401                 start_src = start - start_buf_shift;
10402
10403                 border = (start_src / odata) - (start / ndata);
10404                 border /= 512;
10405                 if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
10406                         /* save critical stripes to buf
10407                          * start     - start address of current unit
10408                          *             to backup [bytes]
10409                          * start_src - start address of current unit
10410                          *             to backup alligned to source array
10411                          *             [bytes]
10412                          */
10413                         unsigned long long next_step_filler = 0;
10414                         unsigned long long copy_length = next_step * 512;
10415
10416                         /* allign copy area length to stripe in old geometry */
10417                         next_step_filler = ((copy_length + start_buf_shift)
10418                                             % old_data_stripe_length);
10419                         if (next_step_filler)
10420                                 next_step_filler = (old_data_stripe_length
10421                                                     - next_step_filler);
10422                         dprintf("save_stripes() parameters: start = %llu,"
10423                                 "\tstart_src = %llu,\tnext_step*512 = %llu,"
10424                                 "\tstart_in_buf_shift = %llu,"
10425                                 "\tnext_step_filler = %llu\n",
10426                                 start, start_src, copy_length,
10427                                 start_buf_shift, next_step_filler);
10428
10429                         if (save_stripes(fds, offsets, map_src->num_members,
10430                                          chunk, map_src->raid_level,
10431                                          source_layout, 0, NULL, start_src,
10432                                          copy_length +
10433                                          next_step_filler + start_buf_shift,
10434                                          buf)) {
10435                                 dprintf("imsm: Cannot save stripes"
10436                                         " to buffer\n");
10437                                 goto abort;
10438                         }
10439                         /* Convert data to destination format and store it
10440                          * in backup general migration area
10441                          */
10442                         if (save_backup_imsm(st, dev, sra,
10443                                 buf + start_buf_shift, copy_length)) {
10444                                 dprintf("imsm: Cannot save stripes to "
10445                                         "target devices\n");
10446                                 goto abort;
10447                         }
10448                         if (save_checkpoint_imsm(st, sra,
10449                                                  UNIT_SRC_IN_CP_AREA)) {
10450                                 dprintf("imsm: Cannot write checkpoint to "
10451                                         "migration record (UNIT_SRC_IN_CP_AREA)\n");
10452                                 goto abort;
10453                         }
10454                 } else {
10455                         /* set next step to use whole border area */
10456                         border /= next_step;
10457                         if (border > 1)
10458                                 next_step *= border;
10459                 }
10460                 /* When data backed up, checkpoint stored,
10461                  * kick the kernel to reshape unit of data
10462                  */
10463                 next_step = next_step + sra->reshape_progress;
10464                 /* limit next step to array max position */
10465                 if (next_step > max_position)
10466                         next_step = max_position;
10467                 sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
10468                 sysfs_set_num(sra, NULL, "suspend_hi", next_step);
10469                 sra->reshape_progress = next_step;
10470
10471                 /* wait until reshape finish */
10472                 if (wait_for_reshape_imsm(sra, ndata) < 0) {
10473                         dprintf("wait_for_reshape_imsm returned error!\n");
10474                         goto abort;
10475                 }
10476
10477                 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
10478                         /* ignore error == 2, this can mean end of reshape here
10479                          */
10480                         dprintf("imsm: Cannot write checkpoint to "
10481                                 "migration record (UNIT_SRC_NORMAL)\n");
10482                         goto abort;
10483                 }
10484
10485         }
10486
10487         /* return '1' if done */
10488         ret_val = 1;
10489 abort:
10490         free(buf);
10491         abort_reshape(sra);
10492
10493         return ret_val;
10494 }
10495 #endif /* MDASSEMBLE */
10496
10497 struct superswitch super_imsm = {
10498 #ifndef MDASSEMBLE
10499         .examine_super  = examine_super_imsm,
10500         .brief_examine_super = brief_examine_super_imsm,
10501         .brief_examine_subarrays = brief_examine_subarrays_imsm,
10502         .export_examine_super = export_examine_super_imsm,
10503         .detail_super   = detail_super_imsm,
10504         .brief_detail_super = brief_detail_super_imsm,
10505         .write_init_super = write_init_super_imsm,
10506         .validate_geometry = validate_geometry_imsm,
10507         .add_to_super   = add_to_super_imsm,
10508         .remove_from_super = remove_from_super_imsm,
10509         .detail_platform = detail_platform_imsm,
10510         .export_detail_platform = export_detail_platform_imsm,
10511         .kill_subarray = kill_subarray_imsm,
10512         .update_subarray = update_subarray_imsm,
10513         .load_container = load_container_imsm,
10514         .default_geometry = default_geometry_imsm,
10515         .get_disk_controller_domain = imsm_get_disk_controller_domain,
10516         .reshape_super  = imsm_reshape_super,
10517         .manage_reshape = imsm_manage_reshape,
10518         .recover_backup = recover_backup_imsm,
10519         .copy_metadata = copy_metadata_imsm,
10520 #endif
10521         .match_home     = match_home_imsm,
10522         .uuid_from_super= uuid_from_super_imsm,
10523         .getinfo_super  = getinfo_super_imsm,
10524         .getinfo_super_disks = getinfo_super_disks_imsm,
10525         .update_super   = update_super_imsm,
10526
10527         .avail_size     = avail_size_imsm,
10528         .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
10529
10530         .compare_super  = compare_super_imsm,
10531
10532         .load_super     = load_super_imsm,
10533         .init_super     = init_super_imsm,
10534         .store_super    = store_super_imsm,
10535         .free_super     = free_super_imsm,
10536         .match_metadata_desc = match_metadata_desc_imsm,
10537         .container_content = container_content_imsm,
10538
10539
10540         .external       = 1,
10541         .name = "imsm",
10542
10543 #ifndef MDASSEMBLE
10544 /* for mdmon */
10545         .open_new       = imsm_open_new,
10546         .set_array_state= imsm_set_array_state,
10547         .set_disk       = imsm_set_disk,
10548         .sync_metadata  = imsm_sync_metadata,
10549         .activate_spare = imsm_activate_spare,
10550         .process_update = imsm_process_update,
10551         .prepare_update = imsm_prepare_update,
10552 #endif /* MDASSEMBLE */
10553 };