+static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+ int i = get_imsm_disk_idx(dev, idx);
+ struct dl *dl;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->index == i)
+ break;
+
+ if (dl && __le32_to_cpu(dl->disk.status) & FAILED_DISK)
+ dl = NULL;
+
+ if (dl)
+ dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
+
+ return dl;
+}
+
+static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+ int idx = get_imsm_disk_idx(dev, slot);
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ unsigned long long esize;
+ unsigned long long pos;
+ struct mdinfo *d;
+ struct extent *ex;
+ int j;
+ int found;
+ __u32 array_start;
+ __u32 status;
+ struct dl *dl;
+
+ for (dl = super->disks; dl; dl = dl->next) {
+ /* If in this array, skip */
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->state_fd >= 0 &&
+ d->disk.major == dl->major &&
+ d->disk.minor == dl->minor) {
+ dprintf("%x:%x already in array\n", dl->major, dl->minor);
+ break;
+ }
+ if (d)
+ continue;
+
+ /* skip in use or failed drives */
+ status = __le32_to_cpu(dl->disk.status);
+ if (status & FAILED_DISK || idx == dl->index) {
+ dprintf("%x:%x status ( %s%s)\n",
+ dl->major, dl->minor,
+ status & FAILED_DISK ? "failed " : "",
+ idx == dl->index ? "in use " : "");
+ continue;
+ }
+
+ /* Does this unused device have the requisite free space?
+ * We need a->info.component_size sectors
+ */
+ ex = get_extents(super, dl);
+ if (!ex) {
+ dprintf("cannot get extents\n");
+ continue;
+ }
+ found = 0;
+ j = 0;
+ pos = 0;
+ array_start = __le32_to_cpu(map->pba_of_lba0);
+
+ do {
+ /* check that we can start at pba_of_lba0 with
+ * a->info.component_size of space
+ */
+ esize = ex[j].start - pos;
+ if (array_start >= pos &&
+ array_start + a->info.component_size < ex[j].start) {
+ found = 1;
+ break;
+ }
+ pos = ex[j].start + ex[j].size;
+ j++;
+
+ } while (ex[j-1].size);
+
+ free(ex);
+ if (!found) {
+ dprintf("%x:%x does not have %llu at %d\n",
+ dl->major, dl->minor,
+ a->info.component_size,
+ __le32_to_cpu(map->pba_of_lba0));
+ /* No room */
+ continue;
+ } else
+ break;
+ }
+
+ return dl;
+}
+
+static struct mdinfo *imsm_activate_spare(struct active_array *a,
+ struct metadata_update **updates)
+{
+ /**
+ * Find a device with unused free space and use it to replace a
+ * failed/vacant region in an array. We replace failed regions one a
+ * array at a time. The result is that a new spare disk will be added
+ * to the first failed array and after the monitor has finished
+ * propagating failures the remainder will be consumed.
+ *
+ * FIXME add a capability for mdmon to request spares from another
+ * container.
+ */
+
+ struct intel_super *super = a->container->sb;
+ int inst = a->info.container_member;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ int failed = a->info.array.raid_disks;
+ struct mdinfo *rv = NULL;
+ struct mdinfo *d;
+ struct mdinfo *di;
+ struct metadata_update *mu;
+ struct dl *dl;
+ struct imsm_update_activate_spare *u;
+ int num_spares = 0;
+ int i;
+
+ for (d = a->info.devs ; d ; d = d->next) {
+ if ((d->curr_state & DS_FAULTY) &&
+ d->state_fd >= 0)
+ /* wait for Removal to happen */
+ return NULL;
+ if (d->state_fd >= 0)
+ failed--;
+ }
+
+ dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
+ inst, failed, a->info.array.raid_disks, a->info.array.level);
+ if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
+ return NULL;
+
+ /* For each slot, if it is not working, find a spare */
+ for (i = 0; i < a->info.array.raid_disks; i++) {
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->disk.raid_disk == i)
+ break;
+ dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+ if (d && (d->state_fd >= 0))
+ continue;
+
+ /*
+ * OK, this device needs recovery. Try to re-add the previous
+ * occupant of this slot, if this fails add a new spare
+ */
+ dl = imsm_readd(super, i, a);
+ if (!dl)
+ dl = imsm_add_spare(super, i, a);
+ if (!dl)
+ continue;
+
+ /* found a usable disk with enough space */
+ di = malloc(sizeof(*di));
+ if (!di)
+ continue;
+ memset(di, 0, sizeof(*di));
+
+ /* dl->index will be -1 in the case we are activating a
+ * pristine spare. imsm_process_update() will create a
+ * new index in this case. Once a disk is found to be
+ * failed in all member arrays it is kicked from the
+ * metadata
+ */
+ di->disk.number = dl->index;
+
+ /* (ab)use di->devs to store a pointer to the device
+ * we chose
+ */
+ di->devs = (struct mdinfo *) dl;
+
+ di->disk.raid_disk = i;
+ di->disk.major = dl->major;
+ di->disk.minor = dl->minor;
+ di->disk.state = 0;
+ di->data_offset = __le32_to_cpu(map->pba_of_lba0);
+ di->component_size = a->info.component_size;
+ di->container_member = inst;
+ di->next = rv;
+ rv = di;
+ num_spares++;
+ dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+ i, di->data_offset);
+
+ break;
+ }
+
+ if (!rv)
+ /* No spares found */
+ return rv;
+ /* Now 'rv' has a list of devices to return.
+ * Create a metadata_update record to update the
+ * disk_ord_tbl for the array
+ */
+ mu = malloc(sizeof(*mu));
+ if (mu) {
+ mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
+ if (mu->buf == NULL) {
+ free(mu);
+ mu = NULL;
+ }
+ }
+ if (!mu) {
+ while (rv) {
+ struct mdinfo *n = rv->next;
+
+ free(rv);
+ rv = n;
+ }
+ return NULL;
+ }
+
+ mu->space = NULL;
+ mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
+ mu->next = *updates;
+ u = (struct imsm_update_activate_spare *) mu->buf;
+
+ for (di = rv ; di ; di = di->next) {
+ u->type = update_activate_spare;
+ u->dl = (struct dl *) di->devs;
+ di->devs = NULL;
+ u->slot = di->disk.raid_disk;
+ u->array = inst;
+ u->next = u + 1;
+ u++;
+ }
+ (u-1)->next = NULL;
+ *updates = mu;
+
+ return rv;
+}
+
+static int disks_overlap(struct imsm_dev *d1, struct imsm_dev *d2)
+{
+ struct imsm_map *m1 = get_imsm_map(d1, 0);
+ struct imsm_map *m2 = get_imsm_map(d2, 0);
+ int i;
+ int j;
+ int idx;
+
+ for (i = 0; i < m1->num_members; i++) {
+ idx = get_imsm_disk_idx(d1, i);
+ for (j = 0; j < m2->num_members; j++)
+ if (idx == get_imsm_disk_idx(d2, j))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void imsm_delete(struct intel_super *super, struct dl **dlp, int index);
+
+static void imsm_process_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /**
+ * crack open the metadata_update envelope to find the update record
+ * update can be one of:
+ * update_activate_spare - a spare device has replaced a failed
+ * device in an array, update the disk_ord_tbl. If this disk is
+ * present in all member arrays then also clear the SPARE_DISK
+ * flag
+ */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb;
+ enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
+
+ /* update requires a larger buf but the allocation failed */
+ if (super->next_len && !super->next_buf) {
+ super->next_len = 0;
+ return;
+ }
+
+ if (super->next_buf) {
+ memcpy(super->next_buf, super->buf, super->len);
+ free(super->buf);
+ super->len = super->next_len;
+ super->buf = super->next_buf;
+
+ super->next_len = 0;
+ super->next_buf = NULL;
+ }
+
+ mpb = super->anchor;
+
+ switch (type) {
+ case update_activate_spare: {
+ struct imsm_update_activate_spare *u = (void *) update->buf;
+ struct imsm_dev *dev = get_imsm_dev(super, u->array);
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *migr_map;
+ struct active_array *a;
+ struct imsm_disk *disk;
+ __u32 status;
+ __u8 to_state;
+ struct dl *dl;
+ unsigned int found;
+ int failed;
+ int victim = get_imsm_disk_idx(dev, u->slot);
+ int i;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl == u->dl)
+ break;
+
+ if (!dl) {
+ fprintf(stderr, "error: imsm_activate_spare passed "
+ "an unknown disk (index: %d)\n",
+ u->dl->index);
+ return;
+ }
+
+ super->updates_pending++;
+
+ /* count failures (excluding rebuilds and the victim)
+ * to determine map[0] state
+ */
+ failed = 0;
+ for (i = 0; i < map->num_members; i++) {
+ if (i == u->slot)
+ continue;
+ disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
+ if (!disk ||
+ __le32_to_cpu(disk->status) & FAILED_DISK)
+ failed++;
+ }
+
+ /* adding a pristine spare, assign a new index */
+ if (dl->index < 0) {
+ dl->index = super->anchor->num_disks;
+ super->anchor->num_disks++;
+ }
+ disk = &dl->disk;
+ status = __le32_to_cpu(disk->status);
+ status |= CONFIGURED_DISK;
+ status &= ~SPARE_DISK;
+ disk->status = __cpu_to_le32(status);
+
+ /* mark rebuild */
+ to_state = imsm_check_degraded(super, dev, failed);
+ map->map_state = IMSM_T_STATE_DEGRADED;
+ migrate(dev, to_state, 1);
+ migr_map = get_imsm_map(dev, 1);
+ set_imsm_ord_tbl_ent(map, u->slot, dl->index);
+ set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
+
+ /* count arrays using the victim in the metadata */
+ found = 0;
+ for (a = st->arrays; a ; a = a->next) {
+ dev = get_imsm_dev(super, a->info.container_member);
+ for (i = 0; i < map->num_members; i++)
+ if (victim == get_imsm_disk_idx(dev, i))
+ found++;
+ }
+
+ /* delete the victim if it is no longer being
+ * utilized anywhere
+ */
+ if (!found) {
+ struct dl **dlp;
+
+ /* We know that 'manager' isn't touching anything,
+ * so it is safe to delete
+ */
+ for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
+ if ((*dlp)->index == victim)
+ break;
+
+ /* victim may be on the missing list */
+ if (!*dlp)
+ for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
+ if ((*dlp)->index == victim)
+ break;
+ imsm_delete(super, dlp, victim);
+ }
+ break;
+ }
+ case update_create_array: {
+ /* someone wants to create a new array, we need to be aware of
+ * a few races/collisions:
+ * 1/ 'Create' called by two separate instances of mdadm
+ * 2/ 'Create' versus 'activate_spare': mdadm has chosen
+ * devices that have since been assimilated via
+ * activate_spare.
+ * In the event this update can not be carried out mdadm will
+ * (FIX ME) notice that its update did not take hold.
+ */
+ struct imsm_update_create_array *u = (void *) update->buf;
+ struct imsm_dev *dev;
+ struct imsm_map *map, *new_map;
+ unsigned long long start, end;
+ unsigned long long new_start, new_end;
+ int i;
+ int overlap = 0;
+
+ /* handle racing creates: first come first serve */
+ if (u->dev_idx < mpb->num_raid_devs) {
+ dprintf("%s: subarray %d already defined\n",
+ __func__, u->dev_idx);
+ return;
+ }
+
+ /* check update is next in sequence */
+ if (u->dev_idx != mpb->num_raid_devs) {
+ dprintf("%s: can not create array %d expected index %d\n",
+ __func__, u->dev_idx, mpb->num_raid_devs);
+ return;
+ }
+
+ new_map = get_imsm_map(&u->dev, 0);
+ new_start = __le32_to_cpu(new_map->pba_of_lba0);
+ new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
+
+ /* handle activate_spare versus create race:
+ * check to make sure that overlapping arrays do not include
+ * overalpping disks
+ */
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, 0);
+ start = __le32_to_cpu(map->pba_of_lba0);
+ end = start + __le32_to_cpu(map->blocks_per_member);
+ if ((new_start >= start && new_start <= end) ||
+ (start >= new_start && start <= new_end))
+ overlap = 1;
+ if (overlap && disks_overlap(dev, &u->dev)) {
+ dprintf("%s: arrays overlap\n", __func__);
+ return;
+ }
+ }
+ /* check num_members sanity */
+ if (new_map->num_members > mpb->num_disks) {
+ dprintf("%s: num_disks out of range\n", __func__);
+ return;
+ }
+
+ /* check that prepare update was successful */
+ if (!update->space) {
+ dprintf("%s: prepare update failed\n", __func__);
+ return;
+ }
+
+ super->updates_pending++;
+ dev = update->space;
+ map = get_imsm_map(dev, 0);
+ update->space = NULL;
+ imsm_copy_dev(dev, &u->dev);
+ map = get_imsm_map(dev, 0);
+ super->dev_tbl[u->dev_idx] = dev;
+ mpb->num_raid_devs++;
+
+ /* fix up flags */
+ for (i = 0; i < map->num_members; i++) {
+ struct imsm_disk *disk;
+ __u32 status;
+
+ disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
+ status = __le32_to_cpu(disk->status);
+ status |= CONFIGURED_DISK;
+ status &= ~SPARE_DISK;
+ disk->status = __cpu_to_le32(status);
+ }
+ break;
+ }
+ case update_add_disk:
+
+ /* we may be able to repair some arrays if disks are
+ * being added */
+ if (super->add) {
+ struct active_array *a;
+
+ super->updates_pending++;
+ for (a = st->arrays; a; a = a->next)
+ a->check_degraded = 1;
+ }
+ /* add some spares to the metadata */
+ while (super->add) {
+ struct dl *al;
+
+ al = super->add;
+ super->add = al->next;
+ al->next = super->disks;
+ super->disks = al;
+ dprintf("%s: added %x:%x\n",
+ __func__, al->major, al->minor);
+ }
+
+ break;
+ }
+}
+
+static void imsm_prepare_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /**
+ * Allocate space to hold new disk entries, raid-device entries or a new
+ * mpb if necessary. The manager synchronously waits for updates to
+ * complete in the monitor, so new mpb buffers allocated here can be
+ * integrated by the monitor thread without worrying about live pointers
+ * in the manager thread.
+ */
+ enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ size_t buf_len;
+ size_t len = 0;
+
+ switch (type) {
+ case update_create_array: {
+ struct imsm_update_create_array *u = (void *) update->buf;
+
+ len = sizeof_imsm_dev(&u->dev, 1);
+ update->space = malloc(len);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* check if we need a larger metadata buffer */
+ if (super->next_buf)
+ buf_len = super->next_len;
+ else
+ buf_len = super->len;
+
+ if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
+ /* ok we need a larger buf than what is currently allocated
+ * if this allocation fails process_update will notice that
+ * ->next_len is set and ->next_buf is NULL
+ */
+ buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
+ if (super->next_buf)
+ free(super->next_buf);
+
+ super->next_len = buf_len;
+ if (posix_memalign(&super->next_buf, buf_len, 512) != 0)
+ super->next_buf = NULL;
+ }
+}
+
+/* must be called while manager is quiesced */
+static void imsm_delete(struct intel_super *super, struct dl **dlp, int index)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct dl *iter;
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+ int i, j, num_members;
+ __u32 ord;
+
+ dprintf("%s: deleting device[%d] from imsm_super\n",
+ __func__, index);
+
+ /* shift all indexes down one */
+ for (iter = super->disks; iter; iter = iter->next)
+ if (iter->index > index)
+ iter->index--;
+ for (iter = super->missing; iter; iter = iter->next)
+ if (iter->index > index)
+ iter->index--;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, 0);
+ num_members = map->num_members;
+ for (j = 0; j < num_members; j++) {
+ /* update ord entries being careful not to propagate
+ * ord-flags to the first map
+ */
+ ord = get_imsm_ord_tbl_ent(dev, j);
+
+ if (ord_to_idx(ord) <= index)
+ continue;
+
+ map = get_imsm_map(dev, 0);
+ set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
+ map = get_imsm_map(dev, 1);
+ if (map)
+ set_imsm_ord_tbl_ent(map, j, ord - 1);
+ }
+ }
+
+ mpb->num_disks--;
+ super->updates_pending++;
+ if (*dlp) {
+ struct dl *dl = *dlp;
+
+ *dlp = (*dlp)->next;
+ __free_imsm_disk(dl);
+ }
+}
+#endif /* MDASSEMBLE */
+