__u8 num_members; /* number of member disks */
__u8 reserved[3];
__u32 filler[7]; /* expansion area */
+#define IMSM_ORD_REBUILD (1 << 24)
__u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
- top byte special */
+ * top byte contains some flags
+ */
} __attribute__ ((packed));
struct imsm_vol {
struct imsm_update_create_array {
enum imsm_update_type type;
- struct imsm_dev dev;
int dev_idx;
+ struct imsm_dev dev;
};
static int imsm_env_devname_as_serial(void)
{
__u32 *ord_tbl = &map->disk_ord_tbl[slot];
- /* top byte is 'special' */
+ /* top byte identifies disk under rebuild
+ * why not just use the USABLE bit... oh well.
+ */
return __le32_to_cpu(*ord_tbl & ~(0xff << 24));
}
+static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot)
+{
+ struct imsm_map *map;
+
+ if (dev->vol.migr_state)
+ map = get_imsm_map(dev, 0);
+ else
+ map = get_imsm_map(dev, 1);
+
+ return map->disk_ord_tbl[slot];
+}
+
static int get_imsm_raid_level(struct imsm_map *map)
{
if (map->raid_level == 1) {
fprintf(stderr, "%s: failed for device %d:%d %s\n",
__func__, d->major, d->minor, strerror(errno));
*mpb = mpb_save;
- return 0;
+ return 1;
}
if (doclose) {
close(d->fd);
}
*mpb = mpb_save;
- return 1;
+ return 0;
}
static int write_super_imsm(struct intel_super *super, int doclose)
int spares = 0;
int raid_disks = 0;
int i;
+ __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
/* 'generation' is incremented everytime the metadata is written */
generation = __le32_to_cpu(mpb->generation_num);
else {
raid_disks++;
mpb->disk[d->index] = d->disk;
+ mpb_size += sizeof(struct imsm_disk);
}
}
if (raid_disks != mpb->num_disks) {
fprintf(stderr, "%s: expected %d disks only found %d\n",
__func__, mpb->num_disks, raid_disks);
- return 0;
+ return 1;
}
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
imsm_copy_dev(dev, super->dev_tbl[i]);
+ mpb_size += sizeof_imsm_dev(dev, 0);
}
+ mpb_size += __le32_to_cpu(mpb->bbm_log_size);
+ mpb->mpb_size = __cpu_to_le32(mpb_size);
/* recalculate checksum */
sum = __gen_imsm_checksum(mpb);
if (store_imsm_mpb(d->fd, super)) {
fprintf(stderr, "%s: failed for device %d:%d %s\n",
__func__, d->major, d->minor, strerror(errno));
- return 0;
+ return 1;
}
if (doclose) {
close(d->fd);
if (spares)
return write_super_imsm_spares(super, doclose);
- return 1;
+ return 0;
}
static int write_init_super_imsm(struct supertype *st)
this = malloc(sizeof(*this));
memset(this, 0, sizeof(*this));
this->next = rest;
- rest = this;
this->array.level = get_imsm_raid_level(map);
this->array.raid_disks = map->num_members;
this->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9;
this->array.state = !vol->dirty;
this->container_member = i;
- if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED ||
+ dev->vol.dirty || dev->vol.migr_state)
this->resync_start = 0;
else
this->resync_start = ~0ULL;
struct mdinfo *info_d;
struct dl *d;
int idx;
+ int skip;
__u32 s;
+ __u32 ord;
+ skip = 0;
idx = get_imsm_disk_idx(map, slot);
+ ord = get_imsm_ord_tbl_ent(dev, slot);
for (d = super->disks; d ; d = d->next)
if (d->index == idx)
break;
if (d == NULL)
- break; /* shouldn't this be continue ?? */
+ skip = 1;
+
+ s = d ? __le32_to_cpu(d->disk.status) : 0;
+ if (s & FAILED_DISK)
+ skip = 1;
+ if (!(s & USABLE_DISK))
+ skip = 1;
+ if (ord & IMSM_ORD_REBUILD)
+ skip = 1;
+
+ /*
+ * if we skip some disks the array will be assmebled degraded;
+ * reset resync start to avoid a dirty-degraded situation
+ *
+ * FIXME handle dirty degraded
+ */
+ if (skip && !dev->vol.dirty)
+ this->resync_start = ~0ULL;
+ if (skip)
+ continue;
info_d = malloc(sizeof(*info_d));
- if (!info_d)
- break; /* ditto ?? */
+ if (!info_d) {
+ fprintf(stderr, Name ": failed to allocate disk"
+ " for volume %s\n", (char *) dev->volume);
+ free(this);
+ this = rest;
+ break;
+ }
memset(info_d, 0, sizeof(*info_d));
info_d->next = this->devs;
this->devs = info_d;
- s = __le32_to_cpu(d->disk.status);
-
info_d->disk.number = d->index;
info_d->disk.major = d->major;
info_d->disk.minor = d->minor;
info_d->disk.raid_disk = slot;
- info_d->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
- info_d->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
- info_d->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0;
this->array.working_disks++;
if (d->devname)
strcpy(info_d->name, d->devname);
}
+ rest = this;
}
return rest;
disk = get_imsm_disk(super, idx);
if (__le32_to_cpu(disk->status) & FAILED_DISK)
failed++;
+ else if (!(__le32_to_cpu(disk->status) & USABLE_DISK))
+ failed++;
}
return failed;
}
-static void imsm_set_array_state(struct active_array *a, int consistent)
+static int imsm_set_array_state(struct active_array *a, int consistent)
{
int inst = a->info.container_member;
struct intel_super *super = a->container->sb;
failed = imsm_count_failed(super, map);
map_state = imsm_check_degraded(super, inst, failed);
+ if (consistent && !dev->vol.dirty &&
+ (dev->vol.migr_state || map_state != IMSM_T_STATE_NORMAL))
+ a->resync_start = 0ULL;
+ if (consistent == 2 && a->resync_start != ~0ULL)
+ consistent = 0;
+
if (a->resync_start == ~0ULL) {
/* complete recovery or initial resync */
if (map->map_state != map_state) {
dev->vol.migr_state = 1;
dev->vol.migr_type = failed ? 1 : 0;
dup_map(dev);
+ a->check_degraded = 1;
super->updates_pending++;
}
dev->vol.dirty = dirty;
super->updates_pending++;
}
+ return consistent;
}
static void imsm_set_disk(struct active_array *a, int n, int state)
super->updates_pending = 0;
}
+static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ int i = get_imsm_disk_idx(map, idx);
+ struct dl *dl;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->index == i)
+ break;
+
+ if (__le32_to_cpu(dl->disk.status) & FAILED_DISK)
+ dl = NULL;
+
+ if (dl)
+ dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
+
+ return dl;
+}
+
+static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct active_array *a)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ unsigned long long esize;
+ unsigned long long pos;
+ struct mdinfo *d;
+ struct extent *ex;
+ int j;
+ int found;
+ __u32 array_start;
+ __u32 status;
+ struct dl *dl;
+
+ for (dl = super->disks; dl; dl = dl->next) {
+ /* If in this array, skip */
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->disk.major == dl->major &&
+ d->disk.minor == dl->minor) {
+ dprintf("%x:%x already in array\n", dl->major, dl->minor);
+ break;
+ }
+ if (d)
+ continue;
+
+ /* skip marked in use or failed drives */
+ status = __le32_to_cpu(dl->disk.status);
+ if (status & FAILED_DISK || status & CONFIGURED_DISK) {
+ dprintf("%x:%x status ( %s%s)\n",
+ dl->major, dl->minor,
+ status & FAILED_DISK ? "failed " : "",
+ status & CONFIGURED_DISK ? "configured " : "");
+ continue;
+ }
+
+ /* Does this unused device have the requisite free space?
+ * We need a->info.component_size sectors
+ */
+ ex = get_extents(super, dl);
+ if (!ex) {
+ dprintf("cannot get extents\n");
+ continue;
+ }
+ found = 0;
+ j = 0;
+ pos = 0;
+ array_start = __le32_to_cpu(map->pba_of_lba0);
+
+ do {
+ /* check that we can start at pba_of_lba0 with
+ * a->info.component_size of space
+ */
+ esize = ex[j].start - pos;
+ if (array_start >= pos &&
+ array_start + a->info.component_size < ex[j].start) {
+ found = 1;
+ break;
+ }
+ pos = ex[j].start + ex[j].size;
+ j++;
+
+ } while (ex[j-1].size);
+
+ free(ex);
+ if (!found) {
+ dprintf("%x:%x does not have %llu at %d\n",
+ dl->major, dl->minor,
+ a->info.component_size,
+ __le32_to_cpu(map->pba_of_lba0));
+ /* No room */
+ continue;
+ } else
+ break;
+ }
+
+ return dl;
+}
+
static struct mdinfo *imsm_activate_spare(struct active_array *a,
struct metadata_update **updates)
{
return NULL;
/* For each slot, if it is not working, find a spare */
- dl = super->disks;
for (i = 0; i < a->info.array.raid_disks; i++) {
for (d = a->info.devs ; d ; d = d->next)
if (d->disk.raid_disk == i)
if (d && (d->state_fd >= 0))
continue;
- /* OK, this device needs recovery. Find a spare */
- for ( ; dl ; dl = dl->next) {
- unsigned long long esize;
- unsigned long long pos;
- struct mdinfo *d2;
- struct extent *ex;
- int j;
- int found;
- __u32 array_start;
-
- /* If in this array, skip */
- for (d2 = a->info.devs ; d2 ; d2 = d2->next)
- if (d2->disk.major == dl->major &&
- d2->disk.minor == dl->minor) {
- dprintf("%x:%x already in array\n", dl->major, dl->minor);
- break;
- }
- if (d2)
- continue;
-
- /* Does this unused device have the requisite free space?
- * We need a->info.component_size sectors
- */
- ex = get_extents(super, dl);
- if (!ex) {
- dprintf("cannot get extents\n");
- continue;
- }
- found = 0;
- j = 0;
- pos = 0;
- array_start = __le32_to_cpu(map->pba_of_lba0);
-
- do {
- /* check that we can start at pba_of_lba0 with
- * a->info.component_size of space
- */
- esize = ex[j].start - pos;
- if (array_start >= pos &&
- array_start + a->info.component_size < ex[j].start) {
- found = 1;
- break;
- }
- pos = ex[j].start + ex[j].size;
- j++;
-
- } while (ex[j-1].size);
-
- free(ex);
- if (!found) {
- dprintf("%x:%x does not have %llu at %d\n",
- dl->major, dl->minor,
- a->info.component_size,
- __le32_to_cpu(map->pba_of_lba0));
- /* No room */
- continue;
- }
-
- /* found a usable disk with enough space */
- di = malloc(sizeof(*di));
- memset(di, 0, sizeof(*di));
-
- /* dl->index will be -1 in the case we are activating a
- * pristine spare. imsm_process_update() will create a
- * new index in this case. Once a disk is found to be
- * failed in all member arrays it is kicked from the
- * metadata
- */
- di->disk.number = dl->index;
+ /*
+ * OK, this device needs recovery. Try to re-add the previous
+ * occupant of this slot, if this fails add a new spare
+ */
+ dl = imsm_readd(super, i, a);
+ if (!dl)
+ dl = imsm_add_spare(super, i, a);
+ if (!dl)
+ continue;
+
+ /* found a usable disk with enough space */
+ di = malloc(sizeof(*di));
+ memset(di, 0, sizeof(*di));
+
+ /* dl->index will be -1 in the case we are activating a
+ * pristine spare. imsm_process_update() will create a
+ * new index in this case. Once a disk is found to be
+ * failed in all member arrays it is kicked from the
+ * metadata
+ */
+ di->disk.number = dl->index;
- /* (ab)use di->devs to store a pointer to the device
- * we chose
- */
- di->devs = (struct mdinfo *) dl;
-
- di->disk.raid_disk = i;
- di->disk.major = dl->major;
- di->disk.minor = dl->minor;
- di->disk.state = 0;
- di->data_offset = array_start;
- di->component_size = a->info.component_size;
- di->container_member = inst;
- di->next = rv;
- rv = di;
- num_spares++;
- dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
- i, pos);
+ /* (ab)use di->devs to store a pointer to the device
+ * we chose
+ */
+ di->devs = (struct mdinfo *) dl;
+
+ di->disk.raid_disk = i;
+ di->disk.major = dl->major;
+ di->disk.minor = dl->minor;
+ di->disk.state = 0;
+ di->data_offset = __le32_to_cpu(map->pba_of_lba0);
+ di->component_size = a->info.component_size;
+ di->container_member = inst;
+ di->next = rv;
+ rv = di;
+ num_spares++;
+ dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+ i, di->data_offset);
- break;
- }
+ break;
}
if (!rv)
/* check update is next in sequence */
if (u->dev_idx != mpb->num_raid_devs) {
- dprintf("%s: can not create arrays out of sequence\n",
- __func__);
+ dprintf("%s: can not create array %d expected index %d\n",
+ __func__, u->dev_idx, mpb->num_raid_devs);
return;
}