}
#ifndef MDASSEMBLE
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
+
static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
{
__u64 sz;
printf(" Chunk Size : %u KiB\n",
__le16_to_cpu(map->blocks_per_strip) / 2);
printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
- printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle\n");
+ printf(" Migrate State : ");
if (dev->vol.migr_state) {
if (migr_type(dev) == MIGR_INIT)
- printf(": initializing\n");
+ printf("initialize\n");
else if (migr_type(dev) == MIGR_REBUILD)
- printf(": rebuilding\n");
+ printf("rebuild\n");
else if (migr_type(dev) == MIGR_VERIFY)
- printf(": check\n");
+ printf("check\n");
else if (migr_type(dev) == MIGR_GEN_MIGR)
- printf(": general migration\n");
+ printf("general migration\n");
else if (migr_type(dev) == MIGR_STATE_CHANGE)
- printf(": state change\n");
+ printf("state change\n");
else if (migr_type(dev) == MIGR_REPAIR)
- printf(": repair\n");
+ printf("repair\n");
else
- printf(": <unknown:%d>\n", migr_type(dev));
- }
+ printf("<unknown:%d>\n", migr_type(dev));
+ } else
+ printf("idle\n");
printf(" Map State : %s", map_state_str[map->map_state]);
if (dev->vol.migr_state) {
struct imsm_map *map = get_imsm_map(dev, 1);
+
printf(" <-- %s", map_state_str[map->map_state]);
+ printf("\n Checkpoint : %u (%llu)",
+ __le32_to_cpu(dev->vol.curr_migr_unit),
+ blocks_per_migr_unit(dev));
}
printf("\n");
printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
}
#endif
+static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
+{
+ /* migr_strip_size when repairing or initializing parity */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 5:
+ case 10:
+ return chunk;
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
+{
+ /* migr_strip_size when rebuilding a degraded disk, no idea why
+ * this is different than migr_strip_size_resync(), but it's good
+ * to be compatible
+ */
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ if (map->num_members % map->num_domains == 0)
+ return 128*1024 >> 9;
+ else
+ return chunk;
+ case 5:
+ return max((__u32) 64*1024 >> 9, chunk);
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+ __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
+ __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
+
+ return max((__u32) 1, hi_chunk / lo_chunk);
+}
+
+static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ int level = get_imsm_raid_level(lo);
+
+ if (level == 1 || level == 10) {
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+
+ return hi->num_domains;
+ } else
+ return num_stripes_per_unit_resync(dev);
+}
+
+static __u8 imsm_num_data_members(struct imsm_dev *dev)
+{
+ /* named 'imsm_' because raid0, raid1 and raid10
+ * counter-intuitively have the same number of data disks
+ */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+
+ switch (get_imsm_raid_level(map)) {
+ case 0:
+ case 1:
+ case 10:
+ return map->num_members;
+ case 5:
+ return map->num_members - 1;
+ default:
+ dprintf("%s: unsupported raid level\n", __func__);
+ return 0;
+ }
+}
+
+static __u32 parity_segment_depth(struct imsm_dev *dev)
+{
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch(get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ return chunk * map->num_domains;
+ case 5:
+ return chunk * map->num_members;
+ default:
+ return chunk;
+ }
+}
+
+static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
+{
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+ __u32 strip = block / chunk;
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10: {
+ __u32 vol_strip = (strip * map->num_domains) + 1;
+ __u32 vol_stripe = vol_strip / map->num_members;
+
+ return vol_stripe * chunk + block % chunk;
+ } case 5: {
+ __u32 stripe = strip / (map->num_members - 1);
+
+ return stripe * chunk + block % chunk;
+ }
+ default:
+ return 0;
+ }
+}
+
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
+{
+ /* calculate the conversion factor between per member 'blocks'
+ * (md/{resync,rebuild}_start) and imsm migration units, return
+ * 0 for the 'not migrating' and 'unsupported migration' cases
+ */
+ if (!dev->vol.migr_state)
+ return 0;
+
+ switch (migr_type(dev)) {
+ case MIGR_VERIFY:
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 stripes_per_unit;
+ __u32 blocks_per_unit;
+ __u32 parity_depth;
+ __u32 migr_chunk;
+ __u32 block_map;
+ __u32 block_rel;
+ __u32 segment;
+ __u32 stripe;
+ __u8 disks;
+
+ /* yes, this is really the translation of migr_units to
+ * per-member blocks in the 'resync' case
+ */
+ stripes_per_unit = num_stripes_per_unit_resync(dev);
+ migr_chunk = migr_strip_blocks_resync(dev);
+ disks = imsm_num_data_members(dev);
+ blocks_per_unit = stripes_per_unit * migr_chunk * disks;
+ stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
+ segment = blocks_per_unit / stripe;
+ block_rel = blocks_per_unit - segment * stripe;
+ parity_depth = parity_segment_depth(dev);
+ block_map = map_migr_block(dev, block_rel);
+ return block_map + parity_depth * segment;
+ }
+ case MIGR_REBUILD: {
+ __u32 stripes_per_unit;
+ __u32 migr_chunk;
+
+ stripes_per_unit = num_stripes_per_unit_rebuild(dev);
+ migr_chunk = migr_strip_blocks_rebuild(dev);
+ return migr_chunk * stripes_per_unit;
+ }
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ default:
+ return 0;
+ }
+}
+
static int imsm_level_to_layout(int level)
{
switch (level) {
info->component_size = __le32_to_cpu(map->blocks_per_member);
memset(info->uuid, 0, sizeof(info->uuid));
- if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
- info->resync_start = 0;
- else if (dev->vol.migr_state)
- /* FIXME add curr_migr_unit to resync_start conversion */
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) {
info->resync_start = 0;
- else
- info->resync_start = ~0ULL;
+ } else if (dev->vol.migr_state) {
+ switch (migr_type(dev)) {
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ __u64 blocks_per_unit = blocks_per_migr_unit(dev);
+ __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
+
+ info->resync_start = blocks_per_unit * units;
+ break;
+ }
+ case MIGR_VERIFY:
+ /* we could emulate the checkpointing of
+ * 'sync_action=check' migrations, but for now
+ * we just immediately complete them
+ */
+ case MIGR_REBUILD:
+ /* this is handled by container_content_imsm() */
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ /* FIXME handle other migrations */
+ default:
+ /* we are not dirty, so... */
+ info->resync_start = MaxSector;
+ }
+ } else
+ info->resync_start = MaxSector;
strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
info->name[MAX_RAID_SERIAL_LEN] = 0;
first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
first->anchor->orig_family_num = sec->anchor->orig_family_num;
first->anchor->family_num = sec->anchor->family_num;
+ memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
for (i = 0; i < sec->anchor->num_raid_devs; i++)
imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
}
struct intel_disk *idisk;
idisk = disk_list_get(dl->serial, disk_list);
- if (is_spare(&idisk->disk) &&
+ if (idisk && is_spare(&idisk->disk) &&
!is_failed(&idisk->disk) && !is_configured(&idisk->disk))
dl->index = -1;
else {
unsigned long reserve;
if (!e)
- return ~0ULL; /* error */
+ return 0;
/* coalesce and sort all extents. also, check to see if we need to
* reserve space between member arrays
} while (e[i-1].size);
free(e);
+ if (maxsize == 0)
+ return 0;
+
+ /* FIXME assumes volume at offset 0 is the first volume in a
+ * container
+ */
if (start_extent > 0)
reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
else
reserve = 0;
if (maxsize < reserve)
- return ~0ULL;
+ return 0;
super->create_offset = ~((__u32) 0);
if (start + reserve > super->create_offset)
- return ~0ULL; /* start overflows create_offset */
+ return 0; /* start overflows create_offset */
super->create_offset = start + reserve;
return maxsize - reserve;
}
#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
+static int
+validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
+ int raiddisks, int chunk, int verbose)
+{
+ if (!is_raid_level_supported(super->orom, level, raiddisks)) {
+ pr_vrb(": platform does not support raid%d with %d disk%s\n",
+ level, raiddisks, raiddisks > 1 ? "s" : "");
+ return 0;
+ }
+ if (super->orom && level != 1 &&
+ !imsm_orom_has_chunk(super->orom, chunk)) {
+ pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
+ return 0;
+ }
+ if (layout != imsm_level_to_layout(level)) {
+ if (level == 5)
+ pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
+ else if (level == 10)
+ pr_vrb(": imsm raid 10 only supports the n2 layout\n");
+ else
+ pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
+ layout, level);
+ return 0;
+ }
+
+ return 1;
+}
+
/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
* FIX ME add ahci details
*/
if (!super)
return 0;
- if (!is_raid_level_supported(super->orom, level, raiddisks)) {
- pr_vrb(": platform does not support raid%d with %d disk%s\n",
- level, raiddisks, raiddisks > 1 ? "s" : "");
- return 0;
- }
- if (super->orom && level != 1 &&
- !imsm_orom_has_chunk(super->orom, chunk)) {
- pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
+ if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose))
return 0;
- }
- if (layout != imsm_level_to_layout(level)) {
- if (level == 5)
- pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
- else if (level == 10)
- pr_vrb(": imsm raid 10 only supports the n2 layout\n");
- else
- pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
- layout, level);
- return 0;
- }
if (!dev) {
/* General test: make sure there is space for
* offset
*/
unsigned long long minsize = size;
- unsigned long long start_offset = ~0ULL;
+ unsigned long long start_offset = MaxSector;
int dcnt = 0;
if (minsize == 0)
minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
esize = e[i].start - pos;
if (esize >= minsize)
found = 1;
- if (found && start_offset == ~0ULL) {
+ if (found && start_offset == MaxSector) {
start_offset = pos;
break;
} else if (found && pos != start_offset) {
i += dl->extent_cnt;
maxsize = merge_extents(super, i);
- if (maxsize < size) {
+ if (maxsize < size || maxsize == 0) {
if (verbose)
fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
maxsize, size);
return 0;
- } else if (maxsize == ~0ULL) {
- if (verbose)
- fprintf(stderr, Name ": failed to merge %d extents\n", i);
- return 0;
}
*freesize = maxsize;
if (cnt < raiddisks ||
(super->orom && used && used != raiddisks) ||
- maxsize < minsize) {
+ maxsize < minsize ||
+ maxsize == 0) {
fprintf(stderr, Name ": not enough devices with space to create array.\n");
return 0; /* No enough free spaces large enough */
}
* created. add_to_super and getinfo_super
* detect when autolayout is in progress.
*/
+ if (!validate_geometry_imsm_orom(st->sb, level, layout,
+ raiddisks, chunk,
+ verbose))
+ return 0;
return reserve_space(st, raiddisks, size, chunk, freesize);
}
return 1;
case 1:
case 10:
case 5:
- break;
+ return 0;
default:
+ if (verbose)
+ fprintf(stderr, Name
+ ": IMSM only supports levels 0,1,5,10\n");
return 1;
}
}
#endif /* MDASSEMBLE */
+static int is_rebuilding(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) != MIGR_REBUILD)
+ return 0;
+
+ migr_map = get_imsm_map(dev, 1);
+
+ if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
+ return 1;
+ else
+ return 0;
+}
+
+static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
+{
+ struct mdinfo *rebuild = NULL;
+ struct mdinfo *d;
+ __u32 units;
+
+ if (!is_rebuilding(dev))
+ return;
+
+ /* Find the rebuild target, but punt on the dual rebuild case */
+ for (d = array->devs; d; d = d->next)
+ if (d->recovery_start == 0) {
+ if (rebuild)
+ return;
+ rebuild = d;
+ }
+
+ units = __le32_to_cpu(dev->vol.curr_migr_unit);
+ rebuild->recovery_start = units * blocks_per_migr_unit(dev);
+}
+
+
static struct mdinfo *container_content_imsm(struct supertype *st)
{
/* Given a container loaded by load_super_imsm_all,
super->current_vol = i;
getinfo_super_imsm_volume(st, this);
for (slot = 0 ; slot < map->num_members; slot++) {
+ unsigned long long recovery_start;
struct mdinfo *info_d;
struct dl *d;
int idx;
if (d->index == idx)
break;
+ recovery_start = MaxSector;
if (d == NULL)
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
if (ord & IMSM_ORD_REBUILD)
- skip = 1;
+ recovery_start = 0;
/*
* if we skip some disks the array will be assmebled degraded;
- * reset resync start to avoid a dirty-degraded situation
+ * reset resync start to avoid a dirty-degraded
+ * situation when performing the intial sync
*
* FIXME handle dirty degraded
*/
- if (skip && !dev->vol.dirty)
- this->resync_start = ~0ULL;
+ if ((skip || recovery_start == 0) && !dev->vol.dirty)
+ this->resync_start = MaxSector;
if (skip)
continue;
- info_d = malloc(sizeof(*info_d));
+ info_d = calloc(1, sizeof(*info_d));
if (!info_d) {
fprintf(stderr, Name ": failed to allocate disk"
" for volume %.16s\n", dev->volume);
+ info_d = this->devs;
+ while (info_d) {
+ struct mdinfo *d = info_d->next;
+
+ free(info_d);
+ info_d = d;
+ }
free(this);
this = rest;
break;
}
- memset(info_d, 0, sizeof(*info_d));
info_d->next = this->devs;
this->devs = info_d;
info_d->disk.major = d->major;
info_d->disk.minor = d->minor;
info_d->disk.raid_disk = slot;
+ info_d->recovery_start = recovery_start;
- this->array.working_disks++;
+ if (info_d->recovery_start == MaxSector)
+ this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
if (d->devname)
strcpy(info_d->name, d->devname);
}
+ /* now that the disk list is up-to-date fixup recovery_start */
+ update_recovery_start(dev, this);
rest = this;
}
return 0;
}
-static int is_rebuilding(struct imsm_dev *dev)
-{
- struct imsm_map *migr_map;
-
- if (!dev->vol.migr_state)
- return 0;
-
- if (migr_type(dev) != MIGR_REBUILD)
- return 0;
-
- migr_map = get_imsm_map(dev, 1);
-
- if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
- return 1;
- else
- return 0;
-}
-
/* return true if we recorded new information */
static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
{
struct imsm_map *map = get_imsm_map(dev, 0);
int failed = imsm_count_failed(super, dev);
__u8 map_state = imsm_check_degraded(super, dev, failed);
+ __u32 blocks_per_unit;
/* before we activate this array handle any missing disks */
if (consistent == 2 && super->missing) {
mark_missing(dev, &dl->disk, dl->index);
super->updates_pending++;
}
-
+
if (consistent == 2 &&
- (!is_resync_complete(a) ||
+ (!is_resync_complete(&a->info) ||
map_state != IMSM_T_STATE_NORMAL ||
dev->vol.migr_state))
consistent = 0;
- if (is_resync_complete(a)) {
+ if (is_resync_complete(&a->info)) {
/* complete intialization / resync,
* recovery and interrupted recovery is completed in
* ->set_disk
}
} else if (!is_resyncing(dev) && !failed) {
/* mark the start of the init process if nothing is failed */
- dprintf("imsm: mark resync start (%llu)\n", a->resync_start);
+ dprintf("imsm: mark resync start\n");
if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
else
super->updates_pending++;
}
- /* FIXME check if we can update curr_migr_unit from resync_start */
+ /* check if we can update curr_migr_unit from resync_start, recovery_start */
+ blocks_per_unit = blocks_per_migr_unit(dev);
+ if (blocks_per_unit && failed <= 1) {
+ __u32 units32;
+ __u64 units;
+
+ if (migr_type(dev) == MIGR_REBUILD)
+ units = min_recovery_start(&a->info) / blocks_per_unit;
+ else
+ units = a->info.resync_start / blocks_per_unit;
+ units32 = units;
+
+ /* check that we did not overflow 32-bits, and that
+ * curr_migr_unit needs updating
+ */
+ if (units32 == units &&
+ __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
+ dprintf("imsm: mark checkpoint (%u)\n", units32);
+ dev->vol.curr_migr_unit = __cpu_to_le32(units32);
+ super->updates_pending++;
+ }
+ }
/* mark dirty / clean */
if (dev->vol.dirty != !consistent) {
- dprintf("imsm: mark '%s' (%llu)\n",
- consistent ? "clean" : "dirty", a->resync_start);
+ dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
if (consistent)
dev->vol.dirty = 0;
else
di->disk.major = dl->major;
di->disk.minor = dl->minor;
di->disk.state = 0;
+ di->recovery_start = 0;
di->data_offset = __le32_to_cpu(map->pba_of_lba0);
di->component_size = a->info.component_size;
di->container_member = inst;