#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE)
#define MPB_SECTOR_CNT 2210
-#define IMSM_RESERVED_SECTORS 4096
-#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
+#define IMSM_RESERVED_SECTORS 8192
+#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2048
#define SECT_PER_MB_SHIFT 11
#define MAX_SECTOR_SIZE 4096
#define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole
* mutliple PPL area
*/
+/*
+ * Internal Write-intent bitmap is stored in the same area where PPL.
+ * Both features are mutually exclusive, so it is not an issue.
+ * The first 8KiB of the area are reserved and shall not be used.
+ */
+#define IMSM_BITMAP_AREA_RESERVED_SIZE 8192
+
+#define IMSM_BITMAP_HEADER_OFFSET (IMSM_BITMAP_AREA_RESERVED_SIZE)
+#define IMSM_BITMAP_HEADER_SIZE MAX_SECTOR_SIZE
+
+#define IMSM_BITMAP_START_OFFSET (IMSM_BITMAP_HEADER_OFFSET + IMSM_BITMAP_HEADER_SIZE)
+#define IMSM_BITMAP_AREA_SIZE (MULTIPLE_PPL_AREA_SIZE_IMSM - IMSM_BITMAP_START_OFFSET)
+#define IMSM_BITMAP_AND_HEADER_SIZE (IMSM_BITMAP_AREA_SIZE + IMSM_BITMAP_HEADER_SIZE)
+
+#define IMSM_DEFAULT_BITMAP_CHUNKSIZE (64 * 1024 * 1024)
+#define IMSM_DEFAULT_BITMAP_DAEMON_SLEEP 5
+
+/*
+ * This macro let's us ensure that no-one accidentally
+ * changes the size of a struct
+ */
+#define ASSERT_SIZE(_struct, size) \
+static inline void __assert_size_##_struct(void) \
+{ \
+ switch (0) { \
+ case 0: break; \
+ case (sizeof(struct _struct) == size): break; \
+ } \
+}
+
/* Disk configuration info. */
#define IMSM_MAX_DEVICES 255
struct imsm_disk {
#define IMSM_DISK_FILLERS 3
__u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
};
+ASSERT_SIZE(imsm_disk, 48)
/* map selector for map managment
*/
__u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
* top byte contains some flags
*/
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_map, 52)
struct imsm_vol {
__u32 curr_migr_unit;
__u32 filler[4];
struct imsm_map map[1];
/* here comes another one if migr_state */
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_vol, 84)
struct imsm_dev {
__u8 volume[MAX_RAID_SERIAL_LEN];
#define RWH_MULTIPLE_DISTRIBUTED 3
#define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
#define RWH_MULTIPLE_OFF 5
+#define RWH_BITMAP 6
__u8 rwh_policy; /* Raid Write Hole Policy */
__u8 jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
__u8 filler1;
#define IMSM_DEV_FILLERS 3
__u32 filler[IMSM_DEV_FILLERS];
struct imsm_vol vol;
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_dev, 164)
struct imsm_super {
__u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
* (starts at 1)
*/
__u16 filler1; /* 0x4E - 0x4F */
-#define IMSM_FILLERS 34
- __u32 filler[IMSM_FILLERS]; /* 0x50 - 0xD7 RAID_MPB_FILLERS */
+ __u64 creation_time; /* 0x50 - 0x57 Array creation time */
+#define IMSM_FILLERS 32
+ __u32 filler[IMSM_FILLERS]; /* 0x58 - 0xD7 RAID_MPB_FILLERS */
struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
/* here comes imsm_dev[num_raid_devs] */
/* here comes BBM logs */
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_super, 264)
#define BBM_LOG_MAX_ENTRIES 254
#define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */
__u32 signature; /* 0xABADB10C */
__u32 entry_count;
struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
-} __attribute__ ((__packed__));
+};
+ASSERT_SIZE(bbm_log, 2040)
static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
__u32 rec_status; /* Status used to determine how to restart
* migration in case it aborts
* in some fashion */
- __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
+ __u32 curr_migr_unit_lo; /* 0..numMigrUnits-1 */
__u32 family_num; /* Family number of MPB
* containing the RaidDev
* that is migrating */
__u32 dest_depth_per_unit; /* Num member blocks each destMap
* member disk
* advances per unit-of-operation */
- __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
- __u32 dest_1st_member_lba; /* First member lba on first
- * stripe of destination */
- __u32 num_migr_units; /* Total num migration units-of-op */
+ __u32 ckpt_area_pba_lo; /* Pba of first block of ckpt copy area */
+ __u32 dest_1st_member_lba_lo; /* First member lba on first
+ * stripe of destination */
+ __u32 num_migr_units_lo; /* Total num migration units-of-op */
__u32 post_migr_vol_cap; /* Size of volume after
* migration completes */
__u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
__u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
* migration ckpt record was read from
* (for recovered migrations) */
-} __attribute__ ((__packed__));
+ __u32 curr_migr_unit_hi; /* 0..numMigrUnits-1 high order 32 bits */
+ __u32 ckpt_area_pba_hi; /* Pba of first block of ckpt copy area
+ * high order 32 bits */
+ __u32 dest_1st_member_lba_hi; /* First member lba on first stripe of
+ * destination - high order 32 bits */
+ __u32 num_migr_units_hi; /* Total num migration units-of-op
+ * high order 32 bits */
+};
+ASSERT_SIZE(migr_record, 64)
struct md_list {
/* usage marker:
static __u32 imsm_min_reserved_sectors(struct intel_super *super);
-static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
+static int split_ull(unsigned long long n, void *lo, void *hi)
{
if (lo == 0 || hi == 0)
return 1;
- *lo = __le32_to_cpu((unsigned)n);
- *hi = __le32_to_cpu((unsigned)(n >> 32));
+ __put_unaligned32(__cpu_to_le32((__u32)n), lo);
+ __put_unaligned32(__cpu_to_le32((n >> 32)), hi);
return 0;
}
return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
}
+static unsigned long long imsm_dev_size(struct imsm_dev *dev)
+{
+ if (dev == NULL)
+ return 0;
+ return join_u32(dev->size_low, dev->size_high);
+}
+
+static unsigned long long migr_chkp_area_pba(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->ckpt_area_pba_lo,
+ migr_rec->ckpt_area_pba_hi);
+}
+
+static unsigned long long current_migr_unit(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->curr_migr_unit_lo,
+ migr_rec->curr_migr_unit_hi);
+}
+
+static unsigned long long migr_dest_1st_member_lba(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->dest_1st_member_lba_lo,
+ migr_rec->dest_1st_member_lba_hi);
+}
+
+static unsigned long long get_num_migr_units(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->num_migr_units_lo,
+ migr_rec->num_migr_units_hi);
+}
+
static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
{
split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
}
-static struct extent *get_extents(struct intel_super *super, struct dl *dl)
+static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n)
+{
+ split_ull(n, &dev->size_low, &dev->size_high);
+}
+
+static void set_migr_chkp_area_pba(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->ckpt_area_pba_lo, &migr_rec->ckpt_area_pba_hi);
+}
+
+static void set_current_migr_unit(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->curr_migr_unit_lo,
+ &migr_rec->curr_migr_unit_hi);
+}
+
+static void set_migr_dest_1st_member_lba(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->dest_1st_member_lba_lo,
+ &migr_rec->dest_1st_member_lba_hi);
+}
+
+static void set_num_migr_units(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->num_migr_units_lo,
+ &migr_rec->num_migr_units_hi);
+}
+
+static unsigned long long per_dev_array_size(struct imsm_map *map)
+{
+ unsigned long long array_size = 0;
+
+ if (map == NULL)
+ return array_size;
+
+ array_size = num_data_stripes(map) * map->blocks_per_strip;
+ if (get_imsm_raid_level(map) == 1 || get_imsm_raid_level(map) == 10)
+ array_size *= 2;
+
+ return array_size;
+}
+
+static struct extent *get_extents(struct intel_super *super, struct dl *dl,
+ int get_minimal_reservation)
{
/* find a list of used extents on the given physical device */
struct extent *rv, *e;
* regardless of whether the OROM has assigned sectors from the
* IMSM_RESERVED_SECTORS region
*/
- if (dl->index == -1)
+ if (dl->index == -1 || get_minimal_reservation)
reservation = imsm_min_reserved_sectors(super);
else
reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
if (get_imsm_disk_slot(map, dl->index) >= 0) {
e->start = pba_of_lba0(map);
- e->size = blocks_per_member(map);
+ e->size = per_dev_array_size(map);
e++;
}
}
if (dl->index == -1)
return MPB_SECTOR_CNT;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
return size;
}
+static int able_to_resync(int raid_level, int missing_disks)
+{
+ int max_missing_disks = 0;
+
+ switch (raid_level) {
+ case 10:
+ max_missing_disks = 1;
+ break;
+ default:
+ max_missing_disks = 0;
+ }
+ return missing_disks <= max_missing_disks;
+}
+
/* try to determine how much space is reserved for metadata from
* the last get_extents() entry on the smallest active disk,
* otherwise fallback to the default
return rv;
/* find last lba used by subarrays on the smallest active disk */
- e = get_extents(super, dl_min);
+ e = get_extents(super, dl_min, 0);
if (!e)
return rv;
for (i = 0; e[i].size; i++)
if (!dl)
return -EINVAL;
/* find last lba used by subarrays */
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
return -EINVAL;
for (i = 0; e[i].size; i++)
printf("\n");
printf("[%.16s]:\n", dev->volume);
+ printf(" Subarray : %d\n", super->current_vol);
printf(" UUID : %s\n", uuid);
printf(" RAID Level : %d", get_imsm_raid_level(map));
if (map2)
} else
printf(" This Slot : ?\n");
printf(" Sector Size : %u\n", super->sector_size);
- sz = __le32_to_cpu(dev->size_high);
- sz <<= 32;
- sz += __le32_to_cpu(dev->size_low);
+ sz = imsm_dev_size(dev);
printf(" Array Size : %llu%s\n",
(unsigned long long)sz * 512 / super->sector_size,
human_size(sz * 512));
printf("Multiple distributed PPLs\n");
else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
printf("Multiple PPLs on journaling drive\n");
+ else if (dev->rwh_policy == RWH_BITMAP)
+ printf("Write-intent bitmap\n");
else
printf("<unknown:%d>\n", dev->rwh_policy);
+
+ printf(" Volume ID : %u\n", dev->my_vol_raid_dev_num);
}
static void print_imsm_disk(struct imsm_disk *disk,
struct migr_record *migr_rec = super->migr_rec;
migr_rec->blocks_per_unit /= IMSM_4K_DIV;
- migr_rec->ckpt_area_pba /= IMSM_4K_DIV;
- migr_rec->dest_1st_member_lba /= IMSM_4K_DIV;
migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
split_ull((join_u32(migr_rec->post_migr_vol_cap,
migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
&migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
+ set_migr_chkp_area_pba(migr_rec,
+ migr_chkp_area_pba(migr_rec) / IMSM_4K_DIV);
+ set_migr_dest_1st_member_lba(migr_rec,
+ migr_dest_1st_member_lba(migr_rec) / IMSM_4K_DIV);
}
void convert_to_4k_imsm_disk(struct imsm_disk *disk)
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
struct imsm_map *map = get_imsm_map(dev, MAP_0);
/* dev */
- split_ull((join_u32(dev->size_low, dev->size_high)/IMSM_4K_DIV),
- &dev->size_low, &dev->size_high);
+ set_imsm_dev_size(dev, imsm_dev_size(dev)/IMSM_4K_DIV);
dev->vol.curr_migr_unit /= IMSM_4K_DIV;
/* map0 */
printf("Normal\n");
else
printf("Contains Data\n");
- printf(" Current Unit : %u\n",
- __le32_to_cpu(migr_rec->curr_migr_unit));
+ printf(" Current Unit : %llu\n",
+ current_migr_unit(migr_rec));
printf(" Family : %u\n",
__le32_to_cpu(migr_rec->family_num));
printf(" Ascending : %u\n",
__le32_to_cpu(migr_rec->blocks_per_unit));
printf(" Dest. Depth Per Unit : %u\n",
__le32_to_cpu(migr_rec->dest_depth_per_unit));
- printf(" Checkpoint Area pba : %u\n",
- __le32_to_cpu(migr_rec->ckpt_area_pba));
- printf(" First member lba : %u\n",
- __le32_to_cpu(migr_rec->dest_1st_member_lba));
- printf(" Total Number of Units : %u\n",
- __le32_to_cpu(migr_rec->num_migr_units));
- printf(" Size of volume : %u\n",
- __le32_to_cpu(migr_rec->post_migr_vol_cap));
- printf(" Expansion space for LBA64 : %u\n",
- __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
+ printf(" Checkpoint Area pba : %llu\n",
+ migr_chkp_area_pba(migr_rec));
+ printf(" First member lba : %llu\n",
+ migr_dest_1st_member_lba(migr_rec));
+ printf(" Total Number of Units : %llu\n",
+ get_num_migr_units(migr_rec));
+ printf(" Size of volume : %llu\n",
+ join_u32(migr_rec->post_migr_vol_cap,
+ migr_rec->post_migr_vol_cap_hi));
printf(" Record was read from : %u\n",
__le32_to_cpu(migr_rec->ckpt_read_disk_num));
struct migr_record *migr_rec = super->migr_rec;
migr_rec->blocks_per_unit *= IMSM_4K_DIV;
- migr_rec->ckpt_area_pba *= IMSM_4K_DIV;
- migr_rec->dest_1st_member_lba *= IMSM_4K_DIV;
migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
split_ull((join_u32(migr_rec->post_migr_vol_cap,
migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
&migr_rec->post_migr_vol_cap,
&migr_rec->post_migr_vol_cap_hi);
+ set_migr_chkp_area_pba(migr_rec,
+ migr_chkp_area_pba(migr_rec) * IMSM_4K_DIV);
+ set_migr_dest_1st_member_lba(migr_rec,
+ migr_dest_1st_member_lba(migr_rec) * IMSM_4K_DIV);
}
void convert_from_4k(struct intel_super *super)
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
struct imsm_map *map = get_imsm_map(dev, MAP_0);
/* dev */
- split_ull((join_u32(dev->size_low, dev->size_high)*IMSM_4K_DIV),
- &dev->size_low, &dev->size_high);
+ set_imsm_dev_size(dev, imsm_dev_size(dev)*IMSM_4K_DIV);
dev->vol.curr_migr_unit *= IMSM_4K_DIV;
/* map0 */
__u32 sum;
__u32 reserved = imsm_reserved_sectors(super, super->disks);
struct dl *dl;
+ time_t creation_time;
strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
str[MPB_SIG_LEN-1] = '\0';
printf(" Magic : %s\n", str);
- snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
printf(" Version : %s\n", get_imsm_version(mpb));
printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
+ creation_time = __le64_to_cpu(mpb->creation_time);
+ printf(" Creation Time : %.24s\n",
+ creation_time ? ctime(&creation_time) : "Unknown");
printf(" Attributes : ");
if (imsm_check_attributes(mpb->attributes))
printf("All supported\n");
printf("MD_LEVEL=container\n");
printf("MD_UUID=%s\n", nbuf+5);
printf("MD_DEVICES=%u\n", mpb->num_disks);
+ printf("MD_CREATION_TIME=%llu\n", __le64_to_cpu(mpb->creation_time));
}
-static int copy_metadata_imsm(struct supertype *st, int from, int to)
-{
- /* The second last sector of the device contains
- * the "struct imsm_super" metadata.
- * This contains mpb_size which is the size in bytes of the
- * extended metadata. This is located immediately before
- * the imsm_super.
- * We want to read all that, plus the last sector which
- * may contain a migration record, and write it all
- * to the target.
- */
- void *buf;
- unsigned long long dsize, offset;
- int sectors;
- struct imsm_super *sb;
- struct intel_super *super = st->sb;
- unsigned int sector_size = super->sector_size;
- unsigned int written = 0;
-
- if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0)
- return 1;
-
- if (!get_dev_size(from, NULL, &dsize))
- goto err;
-
- if (lseek64(from, dsize-(2*sector_size), 0) < 0)
- goto err;
- if ((unsigned int)read(from, buf, sector_size) != sector_size)
- goto err;
- sb = buf;
- if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
- goto err;
-
- sectors = mpb_sectors(sb, sector_size) + 2;
- offset = dsize - sectors * sector_size;
- if (lseek64(from, offset, 0) < 0 ||
- lseek64(to, offset, 0) < 0)
- goto err;
- while (written < sectors * sector_size) {
- int n = sectors*sector_size - written;
- if (n > 4096)
- n = 4096;
- if (read(from, buf, n) != n)
- goto err;
- if (write(to, buf, n) != n)
- goto err;
- written += n;
- }
- free(buf);
- return 0;
-err:
- free(buf);
- return 1;
-}
-
-static void detail_super_imsm(struct supertype *st, char *homehost)
+static void detail_super_imsm(struct supertype *st, char *homehost,
+ char *subarray)
{
struct mdinfo info;
char nbuf[64];
+ struct intel_super *super = st->sb;
+ int temp_vol = super->current_vol;
+
+ if (subarray)
+ super->current_vol = strtoul(subarray, NULL, 10);
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("\n UUID : %s\n", nbuf + 5);
+
+ super->current_vol = temp_vol;
}
-static void brief_detail_super_imsm(struct supertype *st)
+static void brief_detail_super_imsm(struct supertype *st, char *subarray)
{
struct mdinfo info;
char nbuf[64];
+ struct intel_super *super = st->sb;
+ int temp_vol = super->current_vol;
+
+ if (subarray)
+ super->current_vol = strtoul(subarray, NULL, 10);
+
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf(" UUID=%s", nbuf + 5);
+
+ super->current_vol = temp_vol;
}
-static int imsm_read_serial(int fd, char *devname, __u8 *serial);
+static int imsm_read_serial(int fd, char *devname, __u8 *serial,
+ size_t serial_buf_len);
static void fd2devname(int fd, char *name);
static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
else {
fd2devname(fd, buf);
printf(" Port%d : %s", port, buf);
- if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
- printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
+ if (imsm_read_serial(fd, NULL, (__u8 *)buf,
+ sizeof(buf)) == 0)
+ printf(" (%s)\n", buf);
else
printf(" ()\n");
close(fd);
return err;
}
-static int print_vmd_attached_devs(struct sys_dev *hba)
+static int print_nvme_info(struct sys_dev *hba)
{
+ char buf[1024];
struct dirent *ent;
DIR *dir;
- char path[292];
- char link[256];
- char *c, *rp;
-
- if (hba->type != SYS_DEV_VMD)
- return 1;
+ char *rp;
+ int fd;
- /* scroll through /sys/dev/block looking for devices attached to
- * this hba
- */
- dir = opendir("/sys/bus/pci/drivers/nvme");
+ dir = opendir("/sys/block/");
if (!dir)
return 1;
for (ent = readdir(dir); ent; ent = readdir(dir)) {
- int n;
-
- /* is 'ent' a device? check that the 'subsystem' link exists and
- * that its target matches 'bus'
- */
- sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem",
- ent->d_name);
- n = readlink(path, link, sizeof(link));
- if (n < 0 || n >= (int)sizeof(link))
- continue;
- link[n] = '\0';
- c = strrchr(link, '/');
- if (!c)
- continue;
- if (strncmp("pci", c+1, strlen("pci")) != 0)
- continue;
-
- sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name);
-
- rp = realpath(path, NULL);
- if (!rp)
- continue;
+ if (strstr(ent->d_name, "nvme")) {
+ sprintf(buf, "/sys/block/%s", ent->d_name);
+ rp = realpath(buf, NULL);
+ if (!rp)
+ continue;
+ if (path_attached_to_hba(rp, hba->path)) {
+ fd = open_dev(ent->d_name);
+ if (!imsm_is_nvme_supported(fd, 0)) {
+ if (fd >= 0)
+ close(fd);
+ free(rp);
+ continue;
+ }
- if (path_attached_to_hba(rp, hba->path)) {
- printf(" NVMe under VMD : %s\n", rp);
+ fd2devname(fd, buf);
+ if (hba->type == SYS_DEV_VMD)
+ printf(" NVMe under VMD : %s", buf);
+ else if (hba->type == SYS_DEV_NVME)
+ printf(" NVMe Device : %s", buf);
+ if (!imsm_read_serial(fd, NULL, (__u8 *)buf,
+ sizeof(buf)))
+ printf(" (%s)\n", buf);
+ else
+ printf("()\n");
+ close(fd);
+ }
+ free(rp);
}
- free(rp);
}
closedir(dir);
printf(" Platform : Intel(R) ");
if (orom->capabilities == 0 && orom->driver_features == 0)
printf("Matrix Storage Manager\n");
+ else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6)
+ printf("Virtual RAID on CPU\n");
else
printf("Rapid Storage Technology%s\n",
imsm_orom_is_enterprise(orom) ? " enterprise" : "");
char buf[PATH_MAX];
printf(" I/O Controller : %s (%s)\n",
vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
- if (print_vmd_attached_devs(hba)) {
+ if (print_nvme_info(hba)) {
if (verbose > 0)
pr_err("failed to get devices attached to VMD domain.\n");
result |= 2;
if (entry->type == SYS_DEV_NVME) {
for (hba = list; hba; hba = hba->next) {
if (hba->type == SYS_DEV_NVME)
- printf(" NVMe Device : %s\n", hba->path);
+ print_nvme_info(hba);
}
printf("\n");
continue;
return num_stripes_per_unit_resync(dev);
}
-static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
+static __u8 imsm_num_data_members(struct imsm_map *map)
{
/* named 'imsm_' because raid0, raid1 and raid10
* counter-intuitively have the same number of data disks
*/
- struct imsm_map *map = get_imsm_map(dev, second_map);
-
switch (get_imsm_raid_level(map)) {
case 0:
return map->num_members;
}
}
+static unsigned long long calc_component_size(struct imsm_map *map,
+ struct imsm_dev *dev)
+{
+ unsigned long long component_size;
+ unsigned long long dev_size = imsm_dev_size(dev);
+ long long calc_dev_size = 0;
+ unsigned int member_disks = imsm_num_data_members(map);
+
+ if (member_disks == 0)
+ return 0;
+
+ component_size = per_dev_array_size(map);
+ calc_dev_size = component_size * member_disks;
+
+ /* Component size is rounded to 1MB so difference between size from
+ * metadata and size calculated from num_data_stripes equals up to
+ * 2048 blocks per each device. If the difference is higher it means
+ * that array size was expanded and num_data_stripes was not updated.
+ */
+ if (llabs(calc_dev_size - (long long)dev_size) >
+ (1 << SECT_PER_MB_SHIFT) * member_disks) {
+ component_size = dev_size / member_disks;
+ dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n",
+ component_size / map->blocks_per_strip,
+ num_data_stripes(map));
+ }
+
+ return component_size;
+}
+
static __u32 parity_segment_depth(struct imsm_dev *dev)
{
struct imsm_map *map = get_imsm_map(dev, MAP_0);
*/
stripes_per_unit = num_stripes_per_unit_resync(dev);
migr_chunk = migr_strip_blocks_resync(dev);
- disks = imsm_num_data_members(dev, MAP_0);
+ disks = imsm_num_data_members(map);
blocks_per_unit = stripes_per_unit * migr_chunk * disks;
stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
segment = blocks_per_unit / stripe;
* sector of disk)
* Parameters:
* super : imsm internal array info
- * info : general array info
* Returns:
* 0 : success
* -1 : fail
* -2 : no migration in progress
******************************************************************************/
-static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
+static int load_imsm_migr_rec(struct intel_super *super)
{
- struct mdinfo *sd;
struct dl *dl;
char nm[30];
int retval = -1;
struct imsm_dev *dev;
struct imsm_map *map;
int slot = -1;
+ int keep_fd = 1;
/* find map under migration */
dev = imsm_get_device_during_migration(super);
if (dev == NULL)
return -2;
- if (info) {
- for (sd = info->devs ; sd ; sd = sd->next) {
- /* read only from one of the first two slots */
- if ((sd->disk.raid_disk < 0) ||
- (sd->disk.raid_disk > 1))
- continue;
+ map = get_imsm_map(dev, MAP_0);
+ if (!map)
+ return -1;
- sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
- fd = dev_open(nm, O_RDONLY);
- if (fd >= 0)
- break;
- }
- }
- if (fd < 0) {
- map = get_imsm_map(dev, MAP_0);
- for (dl = super->disks; dl; dl = dl->next) {
- /* skip spare and failed disks
- */
- if (dl->index < 0)
- continue;
- /* read only from one of the first two slots */
- if (map)
- slot = get_imsm_disk_slot(map, dl->index);
- if (map == NULL || slot > 1 || slot < 0)
- continue;
+ for (dl = super->disks; dl; dl = dl->next) {
+ /* skip spare and failed disks
+ */
+ if (dl->index < 0)
+ continue;
+ /* read only from one of the first two slots
+ */
+ slot = get_imsm_disk_slot(map, dl->index);
+ if (slot > 1 || slot < 0)
+ continue;
+
+ if (dl->fd < 0) {
sprintf(nm, "%d:%d", dl->major, dl->minor);
fd = dev_open(nm, O_RDONLY);
- if (fd >= 0)
+ if (fd >= 0) {
+ keep_fd = 0;
break;
+ }
+ } else {
+ fd = dl->fd;
+ break;
}
}
+
if (fd < 0)
- goto out;
+ return retval;
retval = read_imsm_migr_rec(fd, super);
-
-out:
- if (fd >= 0)
+ if (!keep_fd)
close(fd);
+
return retval;
}
return 0;
}
(*u)->type = update_general_migration_checkpoint;
- (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
+ (*u)->curr_migr_unit = current_migr_unit(super->migr_rec);
dprintf("prepared for %u\n", (*u)->curr_migr_unit);
return update_memory_size;
struct intel_super *super = st->sb;
unsigned int sector_size = super->sector_size;
unsigned long long dsize;
- char nm[30];
- int fd = -1;
int retval = -1;
struct dl *sd;
int len;
if (map == NULL || slot > 1 || slot < 0)
continue;
- sprintf(nm, "%d:%d", sd->major, sd->minor);
- fd = dev_open(nm, O_RDWR);
- if (fd < 0)
- continue;
- get_dev_size(fd, NULL, &dsize);
- if (lseek64(fd, dsize - (MIGR_REC_SECTOR_POSITION*sector_size),
+ get_dev_size(sd->fd, NULL, &dsize);
+ if (lseek64(sd->fd, dsize - (MIGR_REC_SECTOR_POSITION *
+ sector_size),
SEEK_SET) < 0) {
pr_err("Cannot seek to anchor block: %s\n",
strerror(errno));
goto out;
}
- if ((unsigned int)write(fd, super->migr_rec_buf,
+ if ((unsigned int)write(sd->fd, super->migr_rec_buf,
MIGR_REC_BUF_SECTORS*sector_size) !=
MIGR_REC_BUF_SECTORS*sector_size) {
pr_err("Cannot write migr record block: %s\n",
strerror(errno));
goto out;
}
- close(fd);
- fd = -1;
}
if (sector_size == 4096)
convert_from_4k_imsm_migr_rec(super);
retval = 0;
out:
- if (fd >= 0)
- close(fd);
return retval;
}
}
return rv;
}
-static unsigned long long imsm_component_size_aligment_check(int level,
+static unsigned long long imsm_component_size_alignment_check(int level,
int chunk_size,
unsigned int sector_size,
unsigned long long component_size)
{
- unsigned int component_size_alligment;
+ unsigned int component_size_alignment;
- /* check component size aligment
+ /* check component size alignment
*/
- component_size_alligment = component_size % (chunk_size/sector_size);
+ component_size_alignment = component_size % (chunk_size/sector_size);
- dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alligment = %u\n",
+ dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alignment = %u\n",
level, chunk_size, component_size,
- component_size_alligment);
+ component_size_alignment);
- if (component_size_alligment && (level != 1) && (level != UnSet)) {
- dprintf("imsm: reported component size alligned from %llu ",
+ if (component_size_alignment && (level != 1) && (level != UnSet)) {
+ dprintf("imsm: reported component size aligned from %llu ",
component_size);
- component_size -= component_size_alligment;
+ component_size -= component_size_alignment;
dprintf_cont("to %llu (%i).\n",
- component_size, component_size_alligment);
+ component_size, component_size_alignment);
}
return component_size;
}
+/*******************************************************************************
+ * Function: get_bitmap_header_sector
+ * Description: Returns the sector where the bitmap header is placed.
+ * Parameters:
+ * st : supertype information
+ * dev_idx : index of the device with bitmap
+ *
+ * Returns:
+ * The sector where the bitmap header is placed
+ ******************************************************************************/
+static unsigned long long get_bitmap_header_sector(struct intel_super *super,
+ int dev_idx)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+ if (!super->sector_size) {
+ dprintf("sector size is not set\n");
+ return 0;
+ }
+
+ return pba_of_lba0(map) + calc_component_size(map, dev) +
+ (IMSM_BITMAP_HEADER_OFFSET / super->sector_size);
+}
+
+/*******************************************************************************
+ * Function: get_bitmap_sector
+ * Description: Returns the sector where the bitmap is placed.
+ * Parameters:
+ * st : supertype information
+ * dev_idx : index of the device with bitmap
+ *
+ * Returns:
+ * The sector where the bitmap is placed
+ ******************************************************************************/
+static unsigned long long get_bitmap_sector(struct intel_super *super,
+ int dev_idx)
+{
+ if (!super->sector_size) {
+ dprintf("sector size is not set\n");
+ return 0;
+ }
+
+ return get_bitmap_header_sector(super, dev_idx) +
+ (IMSM_BITMAP_HEADER_SIZE / super->sector_size);
+}
+
static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
{
struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
info->array.chunk_size =
__le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
info->array.state = !(dev->vol.dirty & RAIDVOL_DIRTY);
- info->custom_array_size = __le32_to_cpu(dev->size_high);
- info->custom_array_size <<= 32;
- info->custom_array_size |= __le32_to_cpu(dev->size_low);
+ info->custom_array_size = imsm_dev_size(dev);
info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
if (is_gen_migration(dev)) {
}
info->data_offset = pba_of_lba0(map_to_analyse);
-
- if (info->array.level == 5) {
- info->component_size = num_data_stripes(map_to_analyse) *
- map_to_analyse->blocks_per_strip;
- } else {
- info->component_size = blocks_per_member(map_to_analyse);
- }
-
- info->component_size = imsm_component_size_aligment_check(
+ info->component_size = calc_component_size(map, dev);
+ info->component_size = imsm_component_size_alignment_check(
info->array.level,
info->array.chunk_size,
super->sector_size,
} else if (info->array.level <= 0) {
info->consistency_policy = CONSISTENCY_POLICY_NONE;
} else {
- info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ if (dev->rwh_policy == RWH_BITMAP) {
+ info->bitmap_offset = get_bitmap_sector(super, super->current_vol);
+ info->consistency_policy = CONSISTENCY_POLICY_BITMAP;
+ } else {
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
}
info->reshape_progress = 0;
case MIGR_GEN_MIGR: {
__u64 blocks_per_unit = blocks_per_migr_unit(super,
dev);
- __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
- unsigned long long array_blocks;
+ __u64 units = current_migr_unit(migr_rec);
int used_disks;
if (__le32_to_cpu(migr_rec->ascending_migr) &&
(units <
- (__le32_to_cpu(migr_rec->num_migr_units)-1)) &&
+ (get_num_migr_units(migr_rec)-1)) &&
(super->migr_rec->rec_status ==
__cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
units++;
(unsigned long long)blocks_per_unit,
info->reshape_progress);
- used_disks = imsm_num_data_members(dev, MAP_1);
+ used_disks = imsm_num_data_members(prev_map);
if (used_disks > 0) {
- array_blocks = blocks_per_member(map) *
+ info->custom_array_size = per_dev_array_size(map) *
used_disks;
- info->custom_array_size =
- round_size_to_mb(array_blocks,
- used_disks);
-
}
}
case MIGR_VERIFY:
__u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
__u32 idx = ord_to_idx(ord);
+ if (super->disks && super->disks->index == (int)idx)
+ info->disk.raid_disk = j;
+
if (!(ord & IMSM_ORD_REBUILD) &&
get_imsm_missing(super, idx)) {
missing = 1;
memcpy(dest, src, sizeof_imsm_dev(src, 0));
}
-static int compare_super_imsm(struct supertype *st, struct supertype *tst)
+static int compare_super_imsm(struct supertype *st, struct supertype *tst,
+ int verbose)
{
/*
* return:
*/
if (!check_env("IMSM_NO_PLATFORM") && first->hba && sec->hba) {
if (first->hba->type != sec->hba->type) {
- fprintf(stderr,
- "HBAs of devices do not match %s != %s\n",
- get_sys_dev_type(first->hba->type),
- get_sys_dev_type(sec->hba->type));
+ if (verbose)
+ pr_err("HBAs of devices do not match %s != %s\n",
+ get_sys_dev_type(first->hba->type),
+ get_sys_dev_type(sec->hba->type));
return 3;
}
+
if (first->orom != sec->orom) {
- fprintf(stderr,
- "HBAs of devices do not match %s != %s\n",
- first->hba->pci_id, sec->hba->pci_id);
+ if (verbose)
+ pr_err("HBAs of devices do not match %s != %s\n",
+ first->hba->pci_id, sec->hba->pci_id);
return 3;
}
+
}
/* if an anchor does not have num_raid_devs set then it is a free
extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
static int imsm_read_serial(int fd, char *devname,
- __u8 serial[MAX_RAID_SERIAL_LEN])
+ __u8 *serial, size_t serial_buf_len)
{
char buf[50];
int rv;
- int len;
+ size_t len;
char *dest;
char *src;
unsigned int i;
len = dest - buf;
dest = buf;
- /* truncate leading characters */
- if (len > MAX_RAID_SERIAL_LEN) {
- dest += len - MAX_RAID_SERIAL_LEN;
- len = MAX_RAID_SERIAL_LEN;
+ if (len > serial_buf_len) {
+ /* truncate leading characters */
+ dest += len - serial_buf_len;
+ len = serial_buf_len;
}
- memset(serial, 0, MAX_RAID_SERIAL_LEN);
+ memset(serial, 0, serial_buf_len);
memcpy(serial, dest, len);
return 0;
char name[40];
__u8 serial[MAX_RAID_SERIAL_LEN];
- rv = imsm_read_serial(fd, devname, serial);
+ rv = imsm_read_serial(fd, devname, serial, MAX_RAID_SERIAL_LEN);
if (rv != 0)
return 2;
struct sys_dev *hba_name;
int rv = 0;
+ if (fd >= 0 && test_partition(fd)) {
+ pr_err("imsm: %s is a partition, cannot be used in IMSM\n",
+ devname);
+ return 1;
+ }
if (fd < 0 || check_env("IMSM_NO_PLATFORM")) {
super->orom = NULL;
super->hba = NULL;
hba = hba->next;
}
fprintf(stderr, ").\n"
- " Mixing devices attached to different %s is not allowed.\n",
- hba_name->type == SYS_DEV_VMD ? "VMD domains" : "controllers");
+ " Mixing devices attached to different controllers is not allowed.\n");
}
return 2;
}
}
/* load migration record */
- err = load_imsm_migr_rec(super, NULL);
+ err = load_imsm_migr_rec(super);
if (err == -1) {
/* migration is in progress,
* but migr_rec cannot be loaded,
}
/* load migration record */
- if (load_imsm_migr_rec(super, NULL) == 0) {
+ if (load_imsm_migr_rec(super) == 0) {
/* Check for unsupported migration features */
if (check_mpb_migr_compatibility(super) != 0) {
pr_err("Unsupported migration detected");
{
struct imsm_super *mpb = super->anchor;
char *reason = NULL;
+ char *start = name;
+ size_t len = strlen(name);
int i;
- if (strlen(name) > MAX_RAID_SERIAL_LEN)
+ if (len > 0) {
+ while (isspace(start[len - 1]))
+ start[--len] = 0;
+ while (*start && isspace(*start))
+ ++start, --len;
+ memmove(name, start, len + 1);
+ }
+
+ if (len > MAX_RAID_SERIAL_LEN)
reason = "must be 16 characters or less";
+ else if (len == 0)
+ reason = "must be a non-empty string";
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = get_imsm_dev(super, i);
struct imsm_map *map;
int idx = mpb->num_raid_devs;
int i;
+ int namelen;
unsigned long long array_blocks;
size_t size_old, size_new;
unsigned long long num_data_stripes;
return 0;
dv = xmalloc(sizeof(*dv));
dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
- strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
+ /*
+ * Explicitly allow truncating to not confuse gcc's
+ * -Werror=stringop-truncation
+ */
+ namelen = min((int) strlen(name), MAX_RAID_SERIAL_LEN);
+ memcpy(dev->volume, name, namelen);
array_blocks = calc_array_size(info->level, info->raid_disks,
info->layout, info->chunk_size,
s->size * BLOCKS_PER_KB);
array_blocks = round_size_to_mb(array_blocks, data_disks);
size_per_member = array_blocks / data_disks;
- dev->size_low = __cpu_to_le32((__u32) array_blocks);
- dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
+ set_imsm_dev_size(dev, array_blocks);
dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
vol = &dev->vol;
vol->migr_state = 0;
vol->curr_migr_unit = 0;
map = get_imsm_map(dev, MAP_0);
set_pba_of_lba0(map, super->create_offset);
- set_blocks_per_member(map, info_to_blocks_per_member(info,
- size_per_member /
- BLOCKS_PER_KB));
map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
map->failed_disk_num = ~0;
if (info->level > 0)
num_data_stripes /= map->num_domains;
set_num_data_stripes(map, num_data_stripes);
+ size_per_member += NUM_BLOCKS_DIRTY_STRIPE_REGION;
+ set_blocks_per_member(map, info_to_blocks_per_member(info,
+ size_per_member /
+ BLOCKS_PER_KB));
+
map->num_members = info->raid_disks;
for (i = 0; i < map->num_members; i++) {
/* initialized in add_to_super */
return 1;
}
+ if (mpb->num_disks == 0)
+ if (!get_dev_sector_size(dl->fd, dl->devname,
+ &super->sector_size))
+ return 1;
+
if (!drive_validate_sector_size(super, dl)) {
pr_err("Combining drives of different sector size in one volume is not allowed\n");
return 1;
sum += __gen_imsm_checksum(mpb);
mpb->family_num = __cpu_to_le32(sum);
mpb->orig_family_num = mpb->family_num;
+ mpb->creation_time = __cpu_to_le64((__u64)time(NULL));
}
super->current_disk = dl;
return 0;
return ret_val;
ret_val = 0;
- if (!imsm_read_serial(disk->fd, NULL, serial)) {
+ if (!imsm_read_serial(disk->fd, NULL, serial, MAX_RAID_SERIAL_LEN)) {
/* Restore disk serial number, because takeover marks disk
* as failed and adds to serial ':0' before it becomes
* a spare disk.
return ret_val;
}
+
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d);
+
static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
int fd, char *devname,
unsigned long long data_offset)
dd->fd = fd;
dd->e = NULL;
dd->action = DISK_ADD;
- rv = imsm_read_serial(fd, devname, dd->serial);
+ rv = imsm_read_serial(fd, devname, dd->serial, MAX_RAID_SERIAL_LEN);
if (rv) {
pr_err("failed to retrieve scsi serial, aborting\n");
if (dd->devname)
snprintf(controller_path, PATH_MAX-1, "%s/device", devpath);
free(devpath);
+ if (!imsm_is_nvme_supported(dd->fd, 1)) {
+ if (dd->devname)
+ free(dd->devname);
+ free(dd);
+ return 1;
+ }
+
if (devpath_to_vendor(controller_path) == 0x8086) {
/*
* If Intel's NVMe drive has serial ended with
} else if (super->hba->type == SYS_DEV_VMD && super->orom &&
!imsm_orom_has_tpv_support(super->orom)) {
pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n"
- "\tPlease refer to Intel(R) RSTe user guide.\n");
+ "\tPlease refer to Intel(R) RSTe/VROC user guide.\n");
free(dd->devname);
free(dd);
return 1;
dd->next = super->disk_mgmt_list;
super->disk_mgmt_list = dd;
} else {
+ /* this is called outside of mdmon
+ * write initial spare metadata
+ * mdmon will overwrite it.
+ */
dd->next = super->disks;
super->disks = dd;
- super->updates_pending++;
+ write_super_imsm_spare(super, dd);
}
return 0;
struct imsm_super anchor;
} spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE)));
-/* spare records have their own family number and do not have any defined raid
- * devices
- */
-static int write_super_imsm_spares(struct intel_super *super, int doclose)
+
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d)
{
struct imsm_super *mpb = super->anchor;
struct imsm_super *spare = &spare_record.anchor;
__u32 sum;
- struct dl *d;
+
+ if (d->index != -1)
+ return 1;
spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super));
spare->generation_num = __cpu_to_le32(1UL);
snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
MPB_SIGNATURE MPB_VERSION_RAID0);
- for (d = super->disks; d; d = d->next) {
- if (d->index != -1)
- continue;
-
- spare->disk[0] = d->disk;
- if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
- spare->attributes |= MPB_ATTRIB_2TB_DISK;
+ spare->disk[0] = d->disk;
+ if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
+ spare->attributes |= MPB_ATTRIB_2TB_DISK;
- if (super->sector_size == 4096)
- convert_to_4k_imsm_disk(&spare->disk[0]);
+ if (super->sector_size == 4096)
+ convert_to_4k_imsm_disk(&spare->disk[0]);
- sum = __gen_imsm_checksum(spare);
- spare->family_num = __cpu_to_le32(sum);
- spare->orig_family_num = 0;
- sum = __gen_imsm_checksum(spare);
- spare->check_sum = __cpu_to_le32(sum);
+ sum = __gen_imsm_checksum(spare);
+ spare->family_num = __cpu_to_le32(sum);
+ spare->orig_family_num = 0;
+ sum = __gen_imsm_checksum(spare);
+ spare->check_sum = __cpu_to_le32(sum);
- if (store_imsm_mpb(d->fd, spare)) {
- pr_err("failed for device %d:%d %s\n",
- d->major, d->minor, strerror(errno));
- return 1;
- }
- if (doclose) {
- close(d->fd);
- d->fd = -1;
- }
+ if (store_imsm_mpb(d->fd, spare)) {
+ pr_err("failed for device %d:%d %s\n",
+ d->major, d->minor, strerror(errno));
+ return 1;
+ }
+
+ return 0;
+}
+/* spare records have their own family number and do not have any defined raid
+ * devices
+ */
+static int write_super_imsm_spares(struct intel_super *super, int doclose)
+{
+ struct dl *d;
+
+ for (d = super->disks; d; d = d->next) {
+ if (d->index != -1)
+ continue;
+
+ if (write_super_imsm_spare(super, d))
+ return 1;
+
+ if (doclose) {
+ close(d->fd);
+ d->fd = -1;
+ }
}
return 0;
__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
+static int write_ppl_header(unsigned long long ppl_sector, int fd, void *buf)
+{
+ struct ppl_header *ppl_hdr = buf;
+ int ret;
+
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, ppl_sector * 512, SEEK_SET) < 0) {
+ ret = -errno;
+ perror("Failed to seek to PPL header location");
+ return ret;
+ }
+
+ if (write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = -errno;
+ perror("Write PPL header failed");
+ return ret;
+ }
+
+ fsync(fd);
+
+ return 0;
+}
+
static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd)
{
struct intel_super *super = st->sb;
struct ppl_header *ppl_hdr;
int ret;
- ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+ /* first clear entire ppl space */
+ ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size);
+ if (ret)
+ return ret;
+
+ ret = posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE);
if (ret) {
pr_err("Failed to allocate PPL header buffer\n");
- return ret;
+ return -ret;
}
memset(buf, 0, PPL_HEADER_SIZE);
ppl_hdr = buf;
memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
- ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
-
- if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
- ret = errno;
- perror("Failed to seek to PPL header location");
- }
- if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
- ret = errno;
- perror("Write PPL header failed");
+ if (info->mismatch_cnt) {
+ /*
+ * We are overwriting an invalid ppl. Make one entry with wrong
+ * checksum to prevent the kernel from skipping resync.
+ */
+ ppl_hdr->entries_count = __cpu_to_le32(1);
+ ppl_hdr->entries[0].checksum = ~0;
}
- if (!ret)
- fsync(fd);
+ ret = write_ppl_header(info->ppl_sector, fd, buf);
free(buf);
return ret;
}
+static int is_rebuilding(struct imsm_dev *dev);
+
static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
struct mdinfo *disk)
{
struct intel_super *super = st->sb;
struct dl *d;
- void *buf;
+ void *buf_orig, *buf, *buf_prev = NULL;
int ret = 0;
- struct ppl_header *ppl_hdr;
+ struct ppl_header *ppl_hdr = NULL;
__u32 crc;
struct imsm_dev *dev;
- struct imsm_map *map;
__u32 idx;
unsigned int i;
unsigned long long ppl_offset = 0;
if (disk->disk.raid_disk < 0)
return 0;
- if (posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE)) {
- pr_err("Failed to allocate PPL header buffer\n");
- return -1;
- }
-
dev = get_imsm_dev(super, info->container_member);
- map = get_imsm_map(dev, MAP_X);
- idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_X);
+ idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_0);
d = get_imsm_dl_disk(super, idx);
if (!d || d->index < 0 || is_failed(&d->disk))
- goto out;
+ return 0;
+
+ if (posix_memalign(&buf_orig, MAX_SECTOR_SIZE, PPL_HEADER_SIZE * 2)) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return -1;
+ }
+ buf = buf_orig;
ret = 1;
while (ppl_offset < MULTIPLE_PPL_AREA_SIZE_IMSM) {
+ void *tmp;
+
dprintf("Checking potential PPL at offset: %llu\n", ppl_offset);
if (lseek64(d->fd, info->ppl_sector * 512 + ppl_offset,
SEEK_SET) < 0) {
perror("Failed to seek to PPL header location");
ret = -1;
- goto out;
+ break;
}
if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
perror("Read PPL header failed");
ret = -1;
- goto out;
+ break;
}
ppl_hdr = buf;
if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
dprintf("Wrong PPL header checksum on %s\n",
d->devname);
- goto out;
+ break;
}
if (prev_gen_num > __le64_to_cpu(ppl_hdr->generation)) {
/* previous was newest, it was already checked */
- goto out;
+ break;
}
if ((__le32_to_cpu(ppl_hdr->signature) !=
dprintf("Wrong PPL header signature on %s\n",
d->devname);
ret = 1;
- goto out;
+ break;
}
ret = 0;
for (i = 0; i < __le32_to_cpu(ppl_hdr->entries_count); i++)
ppl_offset +=
__le32_to_cpu(ppl_hdr->entries[i].pp_size);
+
+ if (!buf_prev)
+ buf_prev = buf + PPL_HEADER_SIZE;
+ tmp = buf_prev;
+ buf_prev = buf;
+ buf = tmp;
}
-out:
- free(buf);
+ if (buf_prev) {
+ buf = buf_prev;
+ ppl_hdr = buf_prev;
+ }
+
+ /*
+ * Update metadata to use mutliple PPLs area (1MB).
+ * This is done once for all RAID members
+ */
+ if (info->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ info->ppl_size != (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9)) {
+ char subarray[20];
+ struct mdinfo *member_dev;
+
+ sprintf(subarray, "%d", info->container_member);
+
+ if (mdmon_running(st->container_devnm))
+ st->update_tail = &st->updates;
+
+ if (st->ss->update_subarray(st, subarray, "ppl", NULL)) {
+ pr_err("Failed to update subarray %s\n",
+ subarray);
+ } else {
+ if (st->update_tail)
+ flush_metadata_updates(st);
+ else
+ st->ss->sync_metadata(st);
+ info->ppl_size = (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+ for (member_dev = info->devs; member_dev;
+ member_dev = member_dev->next)
+ member_dev->ppl_size =
+ (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+ }
+ }
+
+ if (ret == 1) {
+ struct imsm_map *map = get_imsm_map(dev, MAP_X);
+
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED ||
+ (map->map_state == IMSM_T_STATE_NORMAL &&
+ !(dev->vol.dirty & RAIDVOL_DIRTY)) ||
+ (is_rebuilding(dev) &&
+ dev->vol.curr_migr_unit == 0 &&
+ get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_1) != idx))
+ ret = st->ss->write_init_ppl(st, info, d->fd);
+ else
+ info->mismatch_cnt++;
+ } else if (ret == 0 &&
+ ppl_hdr->entries_count == 0 &&
+ is_rebuilding(dev) &&
+ info->resync_start == 0) {
+ /*
+ * The header has no entries - add a single empty entry and
+ * rewrite the header to prevent the kernel from going into
+ * resync after an interrupted rebuild.
+ */
+ ppl_hdr->entries_count = __cpu_to_le32(1);
+ ret = write_ppl_header(info->ppl_sector, d->fd, buf);
+ }
- if (ret == 1 && map->map_state == IMSM_T_STATE_UNINITIALIZED)
- return st->ss->write_init_ppl(st, info, d->fd);
+ free(buf_orig);
return ret;
}
return ret;
}
+/*******************************************************************************
+ * Function: write_init_bitmap_imsm_vol
+ * Description: Write a bitmap header and prepares the area for the bitmap.
+ * Parameters:
+ * st : supertype information
+ * vol_idx : the volume index to use
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int write_init_bitmap_imsm_vol(struct supertype *st, int vol_idx)
+{
+ struct intel_super *super = st->sb;
+ int prev_current_vol = super->current_vol;
+ struct dl *d;
+ int ret = 0;
+
+ super->current_vol = vol_idx;
+ for (d = super->disks; d; d = d->next) {
+ if (d->index < 0 || is_failed(&d->disk))
+ continue;
+ ret = st->ss->write_bitmap(st, d->fd, NoUpdate);
+ if (ret)
+ break;
+ }
+ super->current_vol = prev_current_vol;
+ return ret;
+}
+
+/*******************************************************************************
+ * Function: write_init_bitmap_imsm_all
+ * Description: Write a bitmap header and prepares the area for the bitmap.
+ * Operation is executed for volumes with CONSISTENCY_POLICY_BITMAP.
+ * Parameters:
+ * st : supertype information
+ * info : info about the volume where the bitmap should be written
+ * vol_idx : the volume index to use
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int write_init_bitmap_imsm_all(struct supertype *st, struct mdinfo *info,
+ int vol_idx)
+{
+ int ret = 0;
+
+ if (info && (info->consistency_policy == CONSISTENCY_POLICY_BITMAP))
+ ret = write_init_bitmap_imsm_vol(st, vol_idx);
+
+ return ret;
+}
+
static int write_init_super_imsm(struct supertype *st)
{
struct intel_super *super = st->sb;
*/
rv = mgmt_disk(st);
} else {
+ /* adding the second volume to the array */
rv = write_init_ppl_imsm_all(st, &info);
+ if (!rv)
+ rv = write_init_bitmap_imsm_all(st, &info, current_vol);
if (!rv)
rv = create_array(st, current_vol);
}
struct dl *d;
for (d = super->disks; d; d = d->next)
Kill(d->devname, NULL, 0, -1, 1);
- if (current_vol >= 0)
+ if (current_vol >= 0) {
rv = write_init_ppl_imsm_all(st, &info);
+ if (!rv)
+ rv = write_init_bitmap_imsm_all(st, &info, current_vol);
+ }
+
if (!rv)
rv = write_super_imsm(st, 1);
}
if (st->ss != tst->ss ||
st->minor_version != tst->minor_version ||
- st->ss->compare_super(st, tst) != 0) {
+ st->ss->compare_super(st, tst, 1) != 0) {
/* Some mismatch. If exactly one array matches this host,
* we can resolve on that one.
* Or, if we are auto assembling, we just ignore the second
mpb = super->anchor;
if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) {
- pr_err("RAID gemetry validation failed. Cannot proceed with the action(s).\n");
+ pr_err("RAID geometry validation failed. Cannot proceed with the action(s).\n");
return 0;
}
if (!dev) {
pos = 0;
i = 0;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e) continue;
do {
unsigned long long esize;
}
/* retrieve the largest free space block */
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
maxsize = 0;
i = 0;
if (e) {
maxsize = merge_extents(super, i);
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
if (maxsize < size || maxsize == 0) {
if (verbose) {
if (super->orom && dl->index < 0 && mpb->num_raid_devs)
continue;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
continue;
for (i = 1; e[i-1].size; i++)
}
maxsize = size;
}
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
cnt = 0;
for (dl = super->disks; dl; dl = dl->next)
if (dl->e)
verbose);
}
+ /*
+ * Size is given in sectors.
+ */
+ if (size && (size < 2048)) {
+ pr_err("Given size must be greater than 1M.\n");
+ /* Depends on algorithm in Create.c :
+ * if container was given (dev == NULL) return -1,
+ * if block device was given ( dev != NULL) return 0.
+ */
+ return dev ? -1 : 0;
+ }
+
if (!dev) {
if (st->sb) {
struct intel_super *super = st->sb;
static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
-static int kill_subarray_imsm(struct supertype *st)
+static int kill_subarray_imsm(struct supertype *st, char *subarray_id)
{
- /* remove the subarray currently referenced by ->current_vol */
+ /* remove the subarray currently referenced by subarray_id */
__u8 i;
struct intel_dev **dp;
struct intel_super *super = st->sb;
- __u8 current_vol = super->current_vol;
+ __u8 current_vol = strtoul(subarray_id, NULL, 10);
struct imsm_super *mpb = super->anchor;
- if (super->current_vol < 0)
+ if (mpb->num_raid_devs == 0)
return 2;
- super->current_vol = -1; /* invalidate subarray cursor */
/* block deletions that would change the uuid of active subarrays
*
append_metadata_update(st, u, sizeof(*u));
} else {
struct imsm_dev *dev;
- int i;
+ int i, namelen;
dev = get_imsm_dev(super, vol);
- strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
- dev->volume[MAX_RAID_SERIAL_LEN-1] = '\0';
+ memset(dev->volume, '\0', MAX_RAID_SERIAL_LEN);
+ namelen = min((int)strlen(name), MAX_RAID_SERIAL_LEN);
+ memcpy(dev->volume, name, namelen);
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);
handle_missing(super, dev);
int sb_errors = 0;
struct dl *d;
int spare_disks = 0;
+ int current_vol = super->current_vol;
/* do not assemble arrays when not all attributes are supported */
if (imsm_check_attributes(mpb->attributes) == 0) {
int slot;
int chunk;
char *ep;
+ int level;
if (subarray &&
(i != strtoul(subarray, &ep, 10) || *ep != '\0'))
dev = get_imsm_dev(super, i);
map = get_imsm_map(dev, MAP_0);
map2 = get_imsm_map(dev, MAP_1);
+ level = get_imsm_raid_level(map);
/* do not publish arrays that are in the middle of an
* unsupported migration
chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
/* mdadm does not support all metadata features- set the bit in all arrays state */
if (!validate_geometry_imsm_orom(super,
- get_imsm_raid_level(map), /* RAID level */
- imsm_level_to_layout(get_imsm_raid_level(map)),
+ level, /* RAID level */
+ imsm_level_to_layout(level),
map->num_members, /* raid disks */
- &chunk, join_u32(dev->size_low, dev->size_high),
+ &chunk, imsm_dev_size(dev),
1 /* verbose */)) {
pr_err("IMSM RAID geometry validation failed. Array %s activation is blocked.\n",
dev->volume);
int idx;
int skip;
__u32 ord;
+ int missing = 0;
skip = 0;
idx = get_imsm_disk_idx(dev, slot, MAP_0);
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
- if (ord & IMSM_ORD_REBUILD)
+ if (!skip && (ord & IMSM_ORD_REBUILD))
recovery_start = 0;
-
+ if (!(ord & IMSM_ORD_REBUILD))
+ this->array.working_disks++;
/*
* if we skip some disks the array will be assmebled degraded;
* reset resync start to avoid a dirty-degraded
* situation when performing the intial sync
- *
- * FIXME handle dirty degraded
*/
- if ((skip || recovery_start == 0) &&
- !(dev->vol.dirty & RAIDVOL_DIRTY))
- this->resync_start = MaxSector;
+ if (skip)
+ missing++;
+
+ if (!(dev->vol.dirty & RAIDVOL_DIRTY)) {
+ if ((!able_to_resync(level, missing) ||
+ recovery_start == 0))
+ this->resync_start = MaxSector;
+ } else {
+ /*
+ * FIXME handle dirty degraded
+ */
+ }
+
if (skip)
continue;
else
this->array.spare_disks++;
}
- if (info_d->recovery_start == MaxSector)
- this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = pba_of_lba0(map);
+ info_d->component_size = calc_component_size(map, dev);
if (map->raid_level == 5) {
- info_d->component_size =
- num_data_stripes(map) *
- map->blocks_per_strip;
info_d->ppl_sector = this->ppl_sector;
info_d->ppl_size = this->ppl_size;
- } else {
- info_d->component_size = blocks_per_member(map);
+ if (this->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ recovery_start == 0)
+ this->resync_start = 0;
}
info_d->bb.supported = 1;
rest = this;
}
+ super->current_vol = current_vol;
return rest;
}
strcat(buf, ":0");
if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN)
shift = len - MAX_RAID_SERIAL_LEN + 1;
- strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN);
+ memcpy(disk->serial, &buf[shift], len + 1 - shift);
disk->status |= FAILED_DISK;
set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
set_imsm_ord_tbl_ent(map2, slot2,
idx | IMSM_ORD_REBUILD);
}
- if (map->failed_disk_num == 0xff)
+ if (map->failed_disk_num == 0xff ||
+ (!is_rebuilding(dev) && map->failed_disk_num > slot))
map->failed_disk_num = slot;
clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
static unsigned long long imsm_set_array_size(struct imsm_dev *dev,
long long new_size)
{
- int used_disks = imsm_num_data_members(dev, MAP_0);
unsigned long long array_blocks;
- struct imsm_map *map;
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ int used_disks = imsm_num_data_members(map);
if (used_disks == 0) {
/* when problems occures
* return current array_blocks value
*/
- array_blocks = __le32_to_cpu(dev->size_high);
- array_blocks = array_blocks << 32;
- array_blocks += __le32_to_cpu(dev->size_low);
+ array_blocks = imsm_dev_size(dev);
return array_blocks;
}
/* set array size in metadata
*/
- if (new_size <= 0) {
+ if (new_size <= 0)
/* OLCE size change is caused by added disks
*/
- map = get_imsm_map(dev, MAP_0);
- array_blocks = blocks_per_member(map) * used_disks;
- } else {
+ array_blocks = per_dev_array_size(map) * used_disks;
+ else
/* Online Volume Size Change
* Using available free space
*/
array_blocks = new_size;
- }
array_blocks = round_size_to_mb(array_blocks, used_disks);
- dev->size_low = __cpu_to_le32((__u32)array_blocks);
- dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
+ set_imsm_dev_size(dev, array_blocks);
return array_blocks;
}
int used_disks;
struct mdinfo *mdi;
- used_disks = imsm_num_data_members(dev, MAP_0);
+ used_disks = imsm_num_data_members(map);
if (used_disks > 0) {
array_blocks =
- blocks_per_member(map) *
+ per_dev_array_size(map) *
used_disks;
array_blocks =
round_size_to_mb(array_blocks,
disk = get_imsm_disk(super, ord_to_idx(ord));
/* check for new failures */
- if (state & DS_FAULTY) {
+ if (disk && (state & DS_FAULTY)) {
if (mark_failure(super, dev, disk, ord_to_idx(ord)))
super->updates_pending++;
}
break;
}
end_migration(dev, super, map_state);
- map = get_imsm_map(dev, MAP_0);
map->failed_disk_num = ~0;
super->updates_pending++;
a->last_checkpoint = 0;
end_migration(dev, super, map_state);
else
map->map_state = map_state;
- map = get_imsm_map(dev, MAP_0);
map->failed_disk_num = ~0;
super->updates_pending++;
break;
break;
}
if (is_rebuilding(dev)) {
- dprintf_cont("while rebuilding.");
- if (map->map_state != map_state) {
- dprintf_cont(" Map state change");
- end_migration(dev, super, map_state);
+ dprintf_cont("while rebuilding ");
+ if (state & DS_FAULTY) {
+ dprintf_cont("removing failed drive ");
+ if (n == map->failed_disk_num) {
+ dprintf_cont("end migration");
+ end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
+ } else {
+ dprintf_cont("fail detected during rebuild, changing map state");
+ map->map_state = map_state;
+ }
super->updates_pending++;
- } else if (!rebuild_done) {
- break;
}
+ if (!rebuild_done)
+ break;
+
/* check if recovery is really finished */
for (mdi = a->info.devs; mdi ; mdi = mdi->next)
if (mdi->recovery_start != MaxSector) {
}
if (recovery_not_finished) {
dprintf_cont("\n");
- dprintf("Rebuild has not finished yet, state not changed");
+ dprintf_cont("Rebuild has not finished yet");
if (a->last_checkpoint < mdi->recovery_start) {
a->last_checkpoint =
mdi->recovery_start;
}
dprintf_cont(" Rebuild done, still degraded");
- dev->vol.migr_state = 0;
- set_migr_type(dev, 0);
- dev->vol.curr_migr_unit = 0;
+ end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
+ super->updates_pending++;
for (i = 0; i < map->num_members; i++) {
int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
/* Does this unused device have the requisite free space?
* It needs to be able to cover all member volumes
*/
- ex = get_extents(super, dl);
+ ex = get_extents(super, dl, 1);
if (!ex) {
dprintf("cannot get extents\n");
continue;
pos = 0;
array_start = pba_of_lba0(map);
array_end = array_start +
- blocks_per_member(map) - 1;
+ per_dev_array_size(map) - 1;
do {
/* check that we can start at pba_of_lba0 with
- * blocks_per_member of space
+ * num_data_stripes*blocks_per_stripe of space
*/
if (array_start >= pos && array_end < ex[j].start) {
found = 1;
remove_disk_super(super,
disk_cfg->major,
disk_cfg->minor);
+ } else {
+ disk_cfg->fd = disk->fd;
+ disk->fd = -1;
}
}
/* release allocate disk structure */
*/
if (u->new_chunksize > 0) {
unsigned long long num_data_stripes;
+ struct imsm_map *dest_map =
+ get_imsm_map(dev, MAP_0);
int used_disks =
- imsm_num_data_members(dev, MAP_0);
+ imsm_num_data_members(dest_map);
if (used_disks == 0)
return ret_val;
map->blocks_per_strip =
__cpu_to_le16(u->new_chunksize * 2);
num_data_stripes =
- (join_u32(dev->size_low, dev->size_high)
- / used_disks);
+ imsm_dev_size(dev) / used_disks;
num_data_stripes /= map->blocks_per_strip;
num_data_stripes /= map->num_domains;
set_num_data_stripes(map, num_data_stripes);
}
+ /* ensure blocks_per_member has valid value
+ */
+ set_blocks_per_member(map,
+ per_dev_array_size(map) +
+ NUM_BLOCKS_DIRTY_STRIPE_REGION);
+
/* add disk
*/
if (u->new_level != 5 || migr_map->raid_level != 0 ||
if (id->index == (unsigned)u->subdev) {
struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
struct imsm_map *map = get_imsm_map(dev, MAP_0);
- int used_disks = imsm_num_data_members(dev, MAP_0);
+ int used_disks = imsm_num_data_members(map);
unsigned long long blocks_per_member;
unsigned long long num_data_stripes;
+ unsigned long long new_size_per_disk;
+
+ if (used_disks == 0)
+ return 0;
/* calculate new size
*/
- blocks_per_member = u->new_size / used_disks;
- num_data_stripes = blocks_per_member /
+ new_size_per_disk = u->new_size / used_disks;
+ blocks_per_member = new_size_per_disk +
+ NUM_BLOCKS_DIRTY_STRIPE_REGION;
+ num_data_stripes = new_size_per_disk /
map->blocks_per_strip;
num_data_stripes /= map->num_domains;
dprintf("(size: %llu, blocks per member: %llu, num_data_stipes: %llu)\n",
- u->new_size, blocks_per_member,
+ u->new_size, new_size_per_disk,
num_data_stripes);
set_blocks_per_member(map, blocks_per_member);
set_num_data_stripes(map, num_data_stripes);
return ret_val;
}
+static int prepare_spare_to_activate(struct supertype *st,
+ struct imsm_update_activate_spare *u)
+{
+ struct intel_super *super = st->sb;
+ int prev_current_vol = super->current_vol;
+ struct active_array *a;
+ int ret = 1;
+
+ for (a = st->arrays; a; a = a->next)
+ /*
+ * Additional initialization (adding bitmap header, filling
+ * the bitmap area with '1's to force initial rebuild for a whole
+ * data-area) is required when adding the spare to the volume
+ * with write-intent bitmap.
+ */
+ if (a->info.container_member == u->array &&
+ a->info.consistency_policy == CONSISTENCY_POLICY_BITMAP) {
+ struct dl *dl;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl == u->dl)
+ break;
+ if (!dl)
+ break;
+
+ super->current_vol = u->array;
+ if (st->ss->write_bitmap(st, dl->fd, NoUpdate))
+ ret = 0;
+ super->current_vol = prev_current_vol;
+ }
+ return ret;
+}
+
static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
struct intel_super *super,
struct active_array *active_array)
if (u->direction == R10_TO_R0) {
unsigned long long num_data_stripes;
- map->num_domains = 1;
- num_data_stripes = blocks_per_member(map);
- num_data_stripes /= map->blocks_per_strip;
- num_data_stripes /= map->num_domains;
- set_num_data_stripes(map, num_data_stripes);
-
/* Number of failed disks must be half of initial disk number */
if (imsm_count_failed(super, dev, MAP_0) !=
(map->num_members / 2))
map->num_domains = 1;
map->raid_level = 0;
map->failed_disk_num = -1;
+ num_data_stripes = imsm_dev_size(dev) / 2;
+ num_data_stripes /= map->blocks_per_strip;
+ set_num_data_stripes(map, num_data_stripes);
}
if (u->direction == R0_TO_R10) {
void **space;
+ unsigned long long num_data_stripes;
+
/* update slots in current disk list */
for (dm = super->disks; dm; dm = dm->next) {
if (dm->index >= 0)
map->map_state = IMSM_T_STATE_DEGRADED;
map->num_domains = 2;
map->raid_level = 1;
+ num_data_stripes = imsm_dev_size(dev) / 2;
+ num_data_stripes /= map->blocks_per_strip;
+ num_data_stripes /= map->num_domains;
+ set_num_data_stripes(map, num_data_stripes);
+
/* replace dev<->dev_new */
dv->dev = dev_new;
}
}
case update_activate_spare: {
struct imsm_update_activate_spare *u = (void *) update->buf;
- if (apply_update_activate_spare(u, super, st->arrays))
+
+ if (prepare_spare_to_activate(st, u) &&
+ apply_update_activate_spare(u, super, st->arrays))
super->updates_pending++;
break;
}
new_map = get_imsm_map(&u->dev, MAP_0);
new_start = pba_of_lba0(new_map);
- new_end = new_start + blocks_per_member(new_map);
+ new_end = new_start + per_dev_array_size(new_map);
inf = get_disk_info(u);
/* handle activate_spare versus create race:
dev = get_imsm_dev(super, i);
map = get_imsm_map(dev, MAP_0);
start = pba_of_lba0(map);
- end = start + blocks_per_member(map);
+ end = start + per_dev_array_size(map);
if ((new_start >= start && new_start <= end) ||
(start >= new_start && start <= new_end))
/* overlap */;
/* sanity check that we are not affecting the uuid of
* an active array
*/
+ memset(name, 0, sizeof(name));
snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
name[MAX_RAID_SERIAL_LEN] = '\0';
for (a = st->arrays; a; a = a->next)
break;
}
- snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
+ memcpy(dev->volume, name, MAX_RAID_SERIAL_LEN);
super->updates_pending++;
break;
}
break;
}
default:
- pr_err("error: unsuported process update type:(type: %d)\n", type);
+ pr_err("error: unsupported process update type:(type: %d)\n", type);
}
}
}
}
-static void close_targets(int *targets, int new_disks)
-{
- int i;
-
- if (!targets)
- return;
-
- for (i = 0; i < new_disks; i++) {
- if (targets[i] >= 0) {
- close(targets[i]);
- targets[i] = -1;
- }
- }
-}
-
static int imsm_get_allowed_degradation(int level, int raid_disks,
struct intel_super *super,
struct imsm_dev *dev)
}
}
-/*******************************************************************************
- * Function: open_backup_targets
- * Description: Function opens file descriptors for all devices given in
- * info->devs
- * Parameters:
- * info : general array info
- * raid_disks : number of disks
- * raid_fds : table of device's file descriptors
- * super : intel super for raid10 degradation check
- * dev : intel device for raid10 degradation check
- * Returns:
- * 0 : success
- * -1 : fail
- ******************************************************************************/
-int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
- struct intel_super *super, struct imsm_dev *dev)
-{
- struct mdinfo *sd;
- int i;
- int opened = 0;
-
- for (i = 0; i < raid_disks; i++)
- raid_fds[i] = -1;
-
- for (sd = info->devs ; sd ; sd = sd->next) {
- char *dn;
-
- if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
- dprintf("disk is faulty!!\n");
- continue;
- }
-
- if (sd->disk.raid_disk >= raid_disks || sd->disk.raid_disk < 0)
- continue;
-
- dn = map_dev(sd->disk.major,
- sd->disk.minor, 1);
- raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
- if (raid_fds[sd->disk.raid_disk] < 0) {
- pr_err("cannot open component\n");
- continue;
- }
- opened++;
- }
- /* check if maximum array degradation level is not exceeded
- */
- if ((raid_disks - opened) >
- imsm_get_allowed_degradation(info->new_level, raid_disks,
- super, dev)) {
- pr_err("Not enough disks can be opened.\n");
- close_targets(raid_fds, raid_disks);
- return -2;
- }
- return 0;
-}
-
/*******************************************************************************
* Function: validate_container_imsm
* Description: This routine validates container after assemble,
return NULL;
get_volume_badblocks(super->bbm_log, ord_to_idx(ord), pba_of_lba0(map),
- blocks_per_member(map), &super->bb);
+ per_dev_array_size(map), &super->bb);
return &super->bb;
}
int new_data_disks;
unsigned long long dsize, dev_sectors;
long long unsigned min_dev_sectors = -1LLU;
- struct mdinfo *sd;
- char nm[30];
- int fd;
struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
struct imsm_map *map_src = get_imsm_map(dev, MAP_1);
unsigned long long num_migr_units;
unsigned long long array_blocks;
+ struct dl *dl_disk = NULL;
memset(migr_rec, 0, sizeof(struct migr_record));
migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
migr_rec->dest_depth_per_unit *=
max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
- new_data_disks = imsm_num_data_members(dev, MAP_0);
+ new_data_disks = imsm_num_data_members(map_dest);
migr_rec->blocks_per_unit =
__cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
migr_rec->dest_depth_per_unit =
if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
num_migr_units++;
- migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
+ set_num_migr_units(migr_rec, num_migr_units);
migr_rec->post_migr_vol_cap = dev->size_low;
migr_rec->post_migr_vol_cap_hi = dev->size_high;
/* Find the smallest dev */
- for (sd = info->devs ; sd ; sd = sd->next) {
- sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
- fd = dev_open(nm, O_RDONLY);
- if (fd < 0)
+ for (dl_disk = super->disks; dl_disk ; dl_disk = dl_disk->next) {
+ /* ignore spares in container */
+ if (dl_disk->index < 0)
continue;
- get_dev_size(fd, NULL, &dsize);
+ get_dev_size(dl_disk->fd, NULL, &dsize);
dev_sectors = dsize / 512;
if (dev_sectors < min_dev_sectors)
min_dev_sectors = dev_sectors;
- close(fd);
}
- migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
+ set_migr_chkp_area_pba(migr_rec, min_dev_sectors -
RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
write_imsm_migr_rec(st);
int dest_layout = 0;
int dest_chunk;
unsigned long long start;
- int data_disks = imsm_num_data_members(dev, MAP_0);
+ int data_disks = imsm_num_data_members(map_dest);
targets = xmalloc(new_disks * sizeof(int));
- for (i = 0; i < new_disks; i++)
- targets[i] = -1;
+ for (i = 0; i < new_disks; i++) {
+ struct dl *dl_disk = get_imsm_dl_disk(super, i);
+
+ targets[i] = dl_disk->fd;
+ }
target_offsets = xcalloc(new_disks, sizeof(unsigned long long));
start = info->reshape_progress * 512;
for (i = 0; i < new_disks; i++) {
- target_offsets[i] = (unsigned long long)
- __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
+ target_offsets[i] = migr_chkp_area_pba(super->migr_rec) * 512;
/* move back copy area adderss, it will be moved forward
* in restore_stripes() using start input variable
*/
target_offsets[i] -= start/data_disks;
}
- if (open_backup_targets(info, new_disks, targets,
- super, dev))
- goto abort;
-
dest_layout = imsm_level_to_layout(map_dest->raid_level);
dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
abort:
if (targets) {
- close_targets(targets, new_disks);
free(targets);
}
free(target_offsets);
unsigned long long blocks_per_unit;
unsigned long long curr_migr_unit;
- if (load_imsm_migr_rec(super, info) != 0) {
+ if (load_imsm_migr_rec(super) != 0) {
dprintf("imsm: ERROR: Cannot read migration record for checkpoint save.\n");
return 1;
}
if (info->reshape_progress % blocks_per_unit)
curr_migr_unit++;
- super->migr_rec->curr_migr_unit =
- __cpu_to_le32(curr_migr_unit);
+ set_current_migr_unit(super->migr_rec, curr_migr_unit);
super->migr_rec->rec_status = __cpu_to_le32(state);
- super->migr_rec->dest_1st_member_lba =
- __cpu_to_le32(curr_migr_unit *
- __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
+ set_migr_dest_1st_member_lba(super->migr_rec,
+ super->migr_rec->dest_depth_per_unit * curr_migr_unit);
+
if (write_imsm_migr_rec(st) < 0) {
dprintf("imsm: Cannot write migration record outside backup area\n");
return 1;
unsigned long long read_offset;
unsigned long long write_offset;
unsigned unit_len;
- int *targets = NULL;
- int new_disks, i, err;
+ int new_disks, err;
char *buf = NULL;
int retval = 1;
unsigned int sector_size = super->sector_size;
- unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
- unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
+ unsigned long curr_migr_unit = current_migr_unit(migr_rec);
+ unsigned long num_migr_units = get_num_migr_units(migr_rec);
char buffer[20];
int skipped_disks = 0;
+ struct dl *dl_disk;
err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
if (err < 1)
map_dest = get_imsm_map(id->dev, MAP_0);
new_disks = map_dest->num_members;
- read_offset = (unsigned long long)
- __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
+ read_offset = migr_chkp_area_pba(migr_rec) * 512;
- write_offset = ((unsigned long long)
- __le32_to_cpu(migr_rec->dest_1st_member_lba) +
+ write_offset = (migr_dest_1st_member_lba(migr_rec) +
pba_of_lba0(map_dest)) * 512;
unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
if (posix_memalign((void **)&buf, sector_size, unit_len) != 0)
goto abort;
- targets = xcalloc(new_disks, sizeof(int));
- if (open_backup_targets(info, new_disks, targets, super, id->dev)) {
- pr_err("Cannot open some devices belonging to array.\n");
- goto abort;
- }
+ for (dl_disk = super->disks; dl_disk; dl_disk = dl_disk->next) {
+ if (dl_disk->index < 0)
+ continue;
- for (i = 0; i < new_disks; i++) {
- if (targets[i] < 0) {
+ if (dl_disk->fd < 0) {
skipped_disks++;
continue;
}
- if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
+ if (lseek64(dl_disk->fd, read_offset, SEEK_SET) < 0) {
pr_err("Cannot seek to block: %s\n",
strerror(errno));
skipped_disks++;
continue;
}
- if ((unsigned)read(targets[i], buf, unit_len) != unit_len) {
+ if (read(dl_disk->fd, buf, unit_len) != unit_len) {
pr_err("Cannot read copy area block: %s\n",
strerror(errno));
skipped_disks++;
continue;
}
- if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
+ if (lseek64(dl_disk->fd, write_offset, SEEK_SET) < 0) {
pr_err("Cannot seek to block: %s\n",
strerror(errno));
skipped_disks++;
continue;
}
- if ((unsigned)write(targets[i], buf, unit_len) != unit_len) {
+ if (write(dl_disk->fd, buf, unit_len) != unit_len) {
pr_err("Cannot restore block: %s\n",
strerror(errno));
skipped_disks++;
retval = 0;
abort:
- if (targets) {
- for (i = 0; i < new_disks; i++)
- if (targets[i])
- close(targets[i]);
- free(targets);
- }
free(buf);
return retval;
}
int imsm_layout = -1;
int data_disks;
struct imsm_dev *dev;
+ struct imsm_map *map;
struct intel_super *super;
unsigned long long current_size;
unsigned long long free_size;
super = st->sb;
dev = get_imsm_dev(super, super->current_vol);
- data_disks = imsm_num_data_members(dev , MAP_0);
+ map = get_imsm_map(dev, MAP_0);
+ data_disks = imsm_num_data_members(map);
/* compute current size per disk member
*/
current_size = info.custom_array_size / data_disks;
if (geo->size > 0 && geo->size != MAX_SIZE) {
/* align component size
*/
- geo->size = imsm_component_size_aligment_check(
+ geo->size = imsm_component_size_alignment_check(
get_imsm_raid_level(dev->vol.map),
chunk * 1024, super->sector_size,
geo->size * 2);
max_size = free_size + current_size;
/* align component size
*/
- max_size = imsm_component_size_aligment_check(
+ max_size = imsm_component_size_alignment_check(
get_imsm_raid_level(dev->vol.map),
chunk * 1024, super->sector_size,
max_size);
return 0;
}
+/* Flush size update if size calculated by num_data_stripes is higher than
+ * imsm_dev_size to eliminate differences during reshape.
+ * Mdmon will recalculate them correctly.
+ * If subarray index is not set then check whole container.
+ * Returns:
+ * 0 - no error occurred
+ * 1 - error detected
+ */
+static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index)
+{
+ struct intel_super *super = st->sb;
+ int tmp = super->current_vol;
+ int ret_val = 1;
+ int i;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ if (subarray_index >= 0 && i != subarray_index)
+ continue;
+ super->current_vol = i;
+ struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ unsigned int disc_count = imsm_num_data_members(map);
+ struct geo_params geo;
+ struct imsm_update_size_change *update;
+ unsigned long long calc_size = per_dev_array_size(map) * disc_count;
+ unsigned long long d_size = imsm_dev_size(dev);
+ int u_size;
+
+ if (calc_size == d_size || dev->vol.migr_type == MIGR_GEN_MIGR)
+ continue;
+
+ /* There is a difference, verify that imsm_dev_size is
+ * rounded correctly and push update.
+ */
+ if (d_size != round_size_to_mb(d_size, disc_count)) {
+ dprintf("imsm: Size of volume %d is not rounded correctly\n",
+ i);
+ goto exit;
+ }
+ memset(&geo, 0, sizeof(struct geo_params));
+ geo.size = d_size;
+ u_size = imsm_create_metadata_update_for_size_change(st, &geo,
+ &update);
+ if (u_size < 1) {
+ dprintf("imsm: Cannot prepare size change update\n");
+ goto exit;
+ }
+ imsm_update_metadata_locally(st, update, u_size);
+ if (st->update_tail) {
+ append_metadata_update(st, update, u_size);
+ flush_metadata_updates(st);
+ st->update_tail = &st->updates;
+ } else {
+ imsm_sync_metadata(st);
+ }
+ }
+ ret_val = 0;
+exit:
+ super->current_vol = tmp;
+ return ret_val;
+}
+
static int imsm_reshape_super(struct supertype *st, unsigned long long size,
int level,
int layout, int chunksize, int raid_disks,
dprintf("for level : %i\n", geo.level);
dprintf("for raid_disks : %i\n", geo.raid_disks);
- if (experimental() == 0)
- return ret_val;
-
if (strcmp(st->container_devnm, st->devnm) == 0) {
/* On container level we can only increase number of devices. */
dprintf("imsm: info: Container operation\n");
struct imsm_update_reshape *u = NULL;
int len;
+ if (imsm_fix_size_mismatch(st, -1)) {
+ dprintf("imsm: Cannot fix size mismatch\n");
+ goto exit_imsm_reshape_super;
+ }
+
len = imsm_create_metadata_update_for_reshape(
st, &geo, old_raid_disks, &u);
struct intel_dev *dv;
unsigned int sector_size = super->sector_size;
struct imsm_dev *dev = NULL;
- struct imsm_map *map_src;
+ struct imsm_map *map_src, *map_dest;
int migr_vol_qan = 0;
int ndata, odata; /* [bytes] */
int chunk; /* [bytes] */
unsigned long long start_buf_shift; /* [bytes] */
int degraded = 0;
int source_layout = 0;
+ int subarray_index = -1;
if (!sra)
return ret_val;
dv->dev->vol.migr_state == 1) {
dev = dv->dev;
migr_vol_qan++;
+ subarray_index = dv->index;
}
}
/* Only one volume can migrate at the same time */
goto abort;
}
+ map_dest = get_imsm_map(dev, MAP_0);
map_src = get_imsm_map(dev, MAP_1);
if (map_src == NULL)
goto abort;
- ndata = imsm_num_data_members(dev, MAP_0);
- odata = imsm_num_data_members(dev, MAP_1);
+ ndata = imsm_num_data_members(map_dest);
+ odata = imsm_num_data_members(map_src);
chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512;
old_data_stripe_length = odata * chunk;
buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
/* extend buffer size for parity disk */
buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
- /* add space for stripe aligment */
+ /* add space for stripe alignment */
buf_size += old_data_stripe_length;
if (posix_memalign((void **)&buf, MAX_SECTOR_SIZE, buf_size)) {
dprintf("imsm: Cannot allocate checkpoint buffer\n");
max_position = sra->component_size * ndata;
source_layout = imsm_level_to_layout(map_src->raid_level);
- while (__le32_to_cpu(migr_rec->curr_migr_unit) <
- __le32_to_cpu(migr_rec->num_migr_units)) {
+ while (current_migr_unit(migr_rec) <
+ get_num_migr_units(migr_rec)) {
/* current reshape position [blocks] */
unsigned long long current_position =
__le32_to_cpu(migr_rec->blocks_per_unit)
- * __le32_to_cpu(migr_rec->curr_migr_unit);
+ * current_migr_unit(migr_rec);
unsigned long long border;
/* Check that array hasn't become failed.
/* return '1' if done */
ret_val = 1;
+
+ /* After the reshape eliminate size mismatch in metadata.
+ * Don't update md/component_size here, volume hasn't
+ * to take whole space. It is allowed by kernel.
+ * md/component_size will be set propoperly after next assembly.
+ */
+ imsm_fix_size_mismatch(st, subarray_index);
+
abort:
free(buf);
/* See Grow.c: abort_reshape() for further explanation */
return ret_val;
}
+/*******************************************************************************
+ * Function: calculate_bitmap_min_chunksize
+ * Description: Calculates the minimal valid bitmap chunk size
+ * Parameters:
+ * max_bits : indicate how many bits can be used for the bitmap
+ * data_area_size : the size of the data area covered by the bitmap
+ *
+ * Returns:
+ * The bitmap chunk size
+ ******************************************************************************/
+static unsigned long long
+calculate_bitmap_min_chunksize(unsigned long long max_bits,
+ unsigned long long data_area_size)
+{
+ unsigned long long min_chunk =
+ 4096; /* sub-page chunks don't work yet.. */
+ unsigned long long bits = data_area_size / min_chunk + 1;
+
+ while (bits > max_bits) {
+ min_chunk *= 2;
+ bits = (bits + 1) / 2;
+ }
+ return min_chunk;
+}
+
+/*******************************************************************************
+ * Function: calculate_bitmap_chunksize
+ * Description: Calculates the bitmap chunk size for the given device
+ * Parameters:
+ * st : supertype information
+ * dev : device for the bitmap
+ *
+ * Returns:
+ * The bitmap chunk size
+ ******************************************************************************/
+static unsigned long long calculate_bitmap_chunksize(struct supertype *st,
+ struct imsm_dev *dev)
+{
+ struct intel_super *super = st->sb;
+ unsigned long long min_chunksize;
+ unsigned long long result = IMSM_DEFAULT_BITMAP_CHUNKSIZE;
+ size_t dev_size = imsm_dev_size(dev);
+
+ min_chunksize = calculate_bitmap_min_chunksize(
+ IMSM_BITMAP_AREA_SIZE * super->sector_size, dev_size);
+
+ if (result < min_chunksize)
+ result = min_chunksize;
+
+ return result;
+}
+
+/*******************************************************************************
+ * Function: init_bitmap_header
+ * Description: Initialize the bitmap header structure
+ * Parameters:
+ * st : supertype information
+ * bms : bitmap header struct to initialize
+ * dev : device for the bitmap
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int init_bitmap_header(struct supertype *st, struct bitmap_super_s *bms,
+ struct imsm_dev *dev)
+{
+ int vol_uuid[4];
+
+ if (!bms || !dev)
+ return -1;
+
+ bms->magic = __cpu_to_le32(BITMAP_MAGIC);
+ bms->version = __cpu_to_le32(BITMAP_MAJOR_HI);
+ bms->daemon_sleep = __cpu_to_le32(IMSM_DEFAULT_BITMAP_DAEMON_SLEEP);
+ bms->sync_size = __cpu_to_le64(IMSM_BITMAP_AREA_SIZE);
+ bms->write_behind = __cpu_to_le32(0);
+
+ uuid_from_super_imsm(st, vol_uuid);
+ memcpy(bms->uuid, vol_uuid, 16);
+
+ bms->chunksize = calculate_bitmap_chunksize(st, dev);
+
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: validate_internal_bitmap_for_drive
+ * Description: Verify if the bitmap header for a given drive.
+ * Parameters:
+ * st : supertype information
+ * offset : The offset from the beginning of the drive where to look for
+ * the bitmap header.
+ * d : the drive info
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int validate_internal_bitmap_for_drive(struct supertype *st,
+ unsigned long long offset,
+ struct dl *d)
+{
+ struct intel_super *super = st->sb;
+ int ret = -1;
+ int vol_uuid[4];
+ bitmap_super_t *bms;
+ int fd;
+
+ if (!d)
+ return -1;
+
+ void *read_buf;
+
+ if (posix_memalign(&read_buf, MAX_SECTOR_SIZE, IMSM_BITMAP_HEADER_SIZE))
+ return -1;
+
+ fd = d->fd;
+ if (fd < 0) {
+ fd = open(d->devname, O_RDONLY, 0);
+ if (fd < 0) {
+ dprintf("cannot open the device %s\n", d->devname);
+ goto abort;
+ }
+ }
+
+ if (lseek64(fd, offset * super->sector_size, SEEK_SET) < 0)
+ goto abort;
+ if (read(fd, read_buf, IMSM_BITMAP_HEADER_SIZE) !=
+ IMSM_BITMAP_HEADER_SIZE)
+ goto abort;
+
+ uuid_from_super_imsm(st, vol_uuid);
+
+ bms = read_buf;
+ if ((bms->magic != __cpu_to_le32(BITMAP_MAGIC)) ||
+ (bms->version != __cpu_to_le32(BITMAP_MAJOR_HI)) ||
+ (!same_uuid((int *)bms->uuid, vol_uuid, st->ss->swapuuid))) {
+ dprintf("wrong bitmap header detected\n");
+ goto abort;
+ }
+
+ ret = 0;
+abort:
+ if ((d->fd < 0) && (fd >= 0))
+ close(fd);
+ if (read_buf)
+ free(read_buf);
+
+ return ret;
+}
+
+/*******************************************************************************
+ * Function: validate_internal_bitmap_imsm
+ * Description: Verify if the bitmap header is in place and with proper data.
+ * Parameters:
+ * st : supertype information
+ *
+ * Returns:
+ * 0 : success or device w/o RWH_BITMAP
+ * -1 : fail
+ ******************************************************************************/
+static int validate_internal_bitmap_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+ unsigned long long offset;
+ struct dl *d;
+
+ if (!dev)
+ return -1;
+
+ if (dev->rwh_policy != RWH_BITMAP)
+ return 0;
+
+ offset = get_bitmap_header_sector(super, super->current_vol);
+ for (d = super->disks; d; d = d->next) {
+ if (d->index < 0 || is_failed(&d->disk))
+ continue;
+
+ if (validate_internal_bitmap_for_drive(st, offset, d)) {
+ pr_err("imsm: bitmap validation failed\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: add_internal_bitmap_imsm
+ * Description: Mark the volume to use the bitmap and updates the chunk size value.
+ * Parameters:
+ * st : supertype information
+ * chunkp : bitmap chunk size
+ * delay : not used for imsm
+ * write_behind : not used for imsm
+ * size : not used for imsm
+ * may_change : not used for imsm
+ * amajor : not used for imsm
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int add_internal_bitmap_imsm(struct supertype *st, int *chunkp,
+ int delay, int write_behind,
+ unsigned long long size, int may_change,
+ int amajor)
+{
+ struct intel_super *super = st->sb;
+ int vol_idx = super->current_vol;
+ struct imsm_dev *dev;
+
+ if (!super->devlist || vol_idx == -1 || !chunkp)
+ return -1;
+
+ dev = get_imsm_dev(super, vol_idx);
+
+ if (!dev) {
+ dprintf("cannot find the device for volume index %d\n",
+ vol_idx);
+ return -1;
+ }
+ dev->rwh_policy = RWH_BITMAP;
+
+ *chunkp = calculate_bitmap_chunksize(st, dev);
+
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: locate_bitmap_imsm
+ * Description: Seek 'fd' to start of write-intent-bitmap.
+ * Parameters:
+ * st : supertype information
+ * fd : file descriptor for the device
+ * node_num : not used for imsm
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int locate_bitmap_imsm(struct supertype *st, int fd, int node_num)
+{
+ struct intel_super *super = st->sb;
+ unsigned long long offset;
+ int vol_idx = super->current_vol;
+
+ if (!super->devlist || vol_idx == -1)
+ return -1;
+
+ offset = get_bitmap_header_sector(super, super->current_vol);
+ dprintf("bitmap header offset is %llu\n", offset);
+
+ lseek64(fd, offset << 9, 0);
+
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: write_init_bitmap_imsm
+ * Description: Write a bitmap header and prepares the area for the bitmap.
+ * Parameters:
+ * st : supertype information
+ * fd : file descriptor for the device
+ * update : not used for imsm
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int write_init_bitmap_imsm(struct supertype *st, int fd,
+ enum bitmap_update update)
+{
+ struct intel_super *super = st->sb;
+ int vol_idx = super->current_vol;
+ int ret = 0;
+ unsigned long long offset;
+ bitmap_super_t bms = { 0 };
+ size_t written = 0;
+ size_t to_write;
+ ssize_t rv_num;
+ void *buf;
+
+ if (!super->devlist || !super->sector_size || vol_idx == -1)
+ return -1;
+
+ struct imsm_dev *dev = get_imsm_dev(super, vol_idx);
+
+ /* first clear the space for bitmap header */
+ unsigned long long bitmap_area_start =
+ get_bitmap_header_sector(super, vol_idx);
+
+ dprintf("zeroing area start (%llu) and size (%u)\n", bitmap_area_start,
+ IMSM_BITMAP_AND_HEADER_SIZE / super->sector_size);
+ if (zero_disk_range(fd, bitmap_area_start,
+ IMSM_BITMAP_HEADER_SIZE / super->sector_size)) {
+ pr_err("imsm: cannot zeroing the space for the bitmap\n");
+ return -1;
+ }
+
+ /* The bitmap area should be filled with "1"s to perform initial
+ * synchronization.
+ */
+ if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE))
+ return -1;
+ memset(buf, 0xFF, MAX_SECTOR_SIZE);
+ offset = get_bitmap_sector(super, vol_idx);
+ lseek64(fd, offset << 9, 0);
+ while (written < IMSM_BITMAP_AREA_SIZE) {
+ to_write = IMSM_BITMAP_AREA_SIZE - written;
+ if (to_write > MAX_SECTOR_SIZE)
+ to_write = MAX_SECTOR_SIZE;
+ rv_num = write(fd, buf, MAX_SECTOR_SIZE);
+ if (rv_num != MAX_SECTOR_SIZE) {
+ ret = -1;
+ dprintf("cannot initialize bitmap area\n");
+ goto abort;
+ }
+ written += rv_num;
+ }
+
+ /* write a bitmap header */
+ init_bitmap_header(st, &bms, dev);
+ memset(buf, 0, MAX_SECTOR_SIZE);
+ memcpy(buf, &bms, sizeof(bitmap_super_t));
+ if (locate_bitmap_imsm(st, fd, 0)) {
+ ret = -1;
+ dprintf("cannot locate the bitmap\n");
+ goto abort;
+ }
+ if (write(fd, buf, MAX_SECTOR_SIZE) != MAX_SECTOR_SIZE) {
+ ret = -1;
+ dprintf("cannot write the bitmap header\n");
+ goto abort;
+ }
+ fsync(fd);
+
+abort:
+ free(buf);
+
+ return ret;
+}
+
+/*******************************************************************************
+ * Function: is_vol_to_setup_bitmap
+ * Description: Checks if a bitmap should be activated on the dev.
+ * Parameters:
+ * info : info about the volume to setup the bitmap
+ * dev : the device to check against bitmap creation
+ *
+ * Returns:
+ * 0 : bitmap should be set up on the device
+ * -1 : otherwise
+ ******************************************************************************/
+static int is_vol_to_setup_bitmap(struct mdinfo *info, struct imsm_dev *dev)
+{
+ if (!dev || !info)
+ return -1;
+
+ if ((strcmp((char *)dev->volume, info->name) == 0) &&
+ (dev->rwh_policy == RWH_BITMAP))
+ return -1;
+
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: set_bitmap_sysfs
+ * Description: Set the sysfs atributes of a given volume to activate the bitmap.
+ * Parameters:
+ * info : info about the volume where the bitmap should be setup
+ * chunksize : bitmap chunk size
+ * location : location of the bitmap
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int set_bitmap_sysfs(struct mdinfo *info, unsigned long long chunksize,
+ char *location)
+{
+ /* The bitmap/metadata is set to external to allow changing of value for
+ * bitmap/location. When external is used, the kernel will treat an offset
+ * related to the device's first lba (in opposition to the "internal" case
+ * when this value is related to the beginning of the superblock).
+ */
+ if (sysfs_set_str(info, NULL, "bitmap/metadata", "external")) {
+ dprintf("failed to set bitmap/metadata\n");
+ return -1;
+ }
+
+ /* It can only be changed when no bitmap is active.
+ * Should be bigger than 512 and must be power of 2.
+ * It is expecting the value in bytes.
+ */
+ if (sysfs_set_num(info, NULL, "bitmap/chunksize",
+ __cpu_to_le32(chunksize))) {
+ dprintf("failed to set bitmap/chunksize\n");
+ return -1;
+ }
+
+ /* It is expecting the value in sectors. */
+ if (sysfs_set_num(info, NULL, "bitmap/space",
+ __cpu_to_le64(IMSM_BITMAP_AREA_SIZE))) {
+ dprintf("failed to set bitmap/space\n");
+ return -1;
+ }
+
+ /* Determines the delay between the bitmap updates.
+ * It is expecting the value in seconds.
+ */
+ if (sysfs_set_num(info, NULL, "bitmap/time_base",
+ __cpu_to_le64(IMSM_DEFAULT_BITMAP_DAEMON_SLEEP))) {
+ dprintf("failed to set bitmap/time_base\n");
+ return -1;
+ }
+
+ /* It is expecting the value in sectors with a sign at the beginning. */
+ if (sysfs_set_str(info, NULL, "bitmap/location", location)) {
+ dprintf("failed to set bitmap/location\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: set_bitmap_imsm
+ * Description: Setup the bitmap for the given volume
+ * Parameters:
+ * st : supertype information
+ * info : info about the volume where the bitmap should be setup
+ *
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+static int set_bitmap_imsm(struct supertype *st, struct mdinfo *info)
+{
+ struct intel_super *super = st->sb;
+ int prev_current_vol = super->current_vol;
+ struct imsm_dev *dev;
+ int ret = -1;
+ char location[16] = "";
+ unsigned long long chunksize;
+ struct intel_dev *dev_it;
+
+ for (dev_it = super->devlist; dev_it; dev_it = dev_it->next) {
+ super->current_vol = dev_it->index;
+ dev = get_imsm_dev(super, super->current_vol);
+
+ if (is_vol_to_setup_bitmap(info, dev)) {
+ if (validate_internal_bitmap_imsm(st)) {
+ dprintf("bitmap header validation failed\n");
+ goto abort;
+ }
+
+ chunksize = calculate_bitmap_chunksize(st, dev);
+ dprintf("chunk size is %llu\n", chunksize);
+
+ snprintf(location, sizeof(location), "+%llu",
+ get_bitmap_sector(super, super->current_vol));
+ dprintf("bitmap offset is %s\n", location);
+
+ if (set_bitmap_sysfs(info, chunksize, location)) {
+ dprintf("cannot setup the bitmap\n");
+ goto abort;
+ }
+ }
+ }
+ ret = 0;
+abort:
+ super->current_vol = prev_current_vol;
+ return ret;
+}
+
struct superswitch super_imsm = {
.examine_super = examine_super_imsm,
.brief_examine_super = brief_examine_super_imsm,
.reshape_super = imsm_reshape_super,
.manage_reshape = imsm_manage_reshape,
.recover_backup = recover_backup_imsm,
- .copy_metadata = copy_metadata_imsm,
.examine_badblocks = examine_badblocks_imsm,
.match_home = match_home_imsm,
.uuid_from_super= uuid_from_super_imsm,
.container_content = container_content_imsm,
.validate_container = validate_container_imsm,
+ .add_internal_bitmap = add_internal_bitmap_imsm,
+ .locate_bitmap = locate_bitmap_imsm,
+ .write_bitmap = write_init_bitmap_imsm,
+ .set_bitmap = set_bitmap_imsm,
+
.write_init_ppl = write_init_ppl_imsm,
.validate_ppl = validate_ppl_imsm,