X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=super-intel.c;h=3249b2ccbd6c59f66646c389709737f98e3be686;hp=c96793f562cd5e568516501790a9eacbf498f416;hb=ff54de6e47163944185f231700e72d3122b58f4c;hpb=ae6aad82398085e07c922fbfa667a5f6ec5dd7e6 diff --git a/super-intel.c b/super-intel.c index c96793f5..3249b2cc 100644 --- a/super-intel.c +++ b/super-intel.c @@ -17,8 +17,10 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define HAVE_STDINT_H 1 #include "mdadm.h" #include "mdmon.h" +#include "sha1.h" #include #include #include @@ -160,6 +162,8 @@ struct intel_super { struct imsm_super *anchor; /* immovable parameters */ }; size_t len; /* size of the 'buf' allocation */ + void *next_buf; /* for realloc'ing buf from the manager */ + size_t next_len; int updates_pending; /* count of pending updates for mdmon */ int creating_imsm; /* flag to indicate container creation */ int current_vol; /* index of raid device undergoing creation */ @@ -174,7 +178,8 @@ struct intel_super { struct imsm_disk disk; int fd; } *disks; - struct bbm_log *bbm_log; + struct dl *add; /* list of disks to add while mdmon active */ + struct bbm_log *bbm_log; }; struct extent { @@ -185,6 +190,7 @@ struct extent { enum imsm_update_type { update_activate_spare, update_create_array, + update_add_disk, }; struct imsm_update_activate_spare { @@ -201,6 +207,10 @@ struct imsm_update_create_array { struct imsm_dev dev; }; +struct imsm_update_add_disk { + enum imsm_update_type type; +}; + static int imsm_env_devname_as_serial(void) { char *val = getenv("IMSM_DEVNAME_AS_SERIAL"); @@ -230,10 +240,12 @@ static struct supertype *match_metadata_desc_imsm(char *arg) return st; } +#ifndef MDASSEMBLE static __u8 *get_imsm_version(struct imsm_super *mpb) { return &mpb->sig[MPB_SIG_LEN]; } +#endif /* retrieve a disk directly from the anchor when the anchor is known to be * up-to-date, currently only at load time @@ -245,6 +257,7 @@ static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index) return &mpb->disk[index]; } +#ifndef MDASSEMBLE /* retrieve a disk from the parsed metadata */ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) { @@ -256,6 +269,7 @@ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) return NULL; } +#endif /* generate a checksum directly from the anchor when the anchor is known to be * up-to-date, currently only at load or write_super after coalescing @@ -337,26 +351,30 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) return super->dev_tbl[index]; } -static __u32 get_imsm_disk_idx(struct imsm_map *map, int slot) -{ - __u32 *ord_tbl = &map->disk_ord_tbl[slot]; - - /* top byte identifies disk under rebuild - * why not just use the USABLE bit... oh well. - */ - return __le32_to_cpu(*ord_tbl & ~(0xff << 24)); -} - static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot) { struct imsm_map *map; if (dev->vol.migr_state) - map = get_imsm_map(dev, 0); - else map = get_imsm_map(dev, 1); + else + map = get_imsm_map(dev, 0); - return map->disk_ord_tbl[slot]; + /* top byte identifies disk under rebuild */ + return __le32_to_cpu(map->disk_ord_tbl[slot]); +} + +#define ord_to_idx(ord) (((ord) << 8) >> 8) +static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot) +{ + __u32 ord = get_imsm_ord_tbl_ent(dev, slot); + + return ord_to_idx(ord); +} + +static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord) +{ + map->disk_ord_tbl[slot] = __cpu_to_le32(ord); } static int get_imsm_raid_level(struct imsm_map *map) @@ -371,6 +389,7 @@ static int get_imsm_raid_level(struct imsm_map *map) return map->raid_level; } +#ifndef MDASSEMBLE static int cmp_extent(const void *av, const void *bv) { const struct extent *a = av; @@ -394,7 +413,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) struct imsm_map *map = get_imsm_map(dev, 0); for (j = 0; j < map->num_members; j++) { - __u32 index = get_imsm_disk_idx(map, j); + __u32 index = get_imsm_disk_idx(dev, j); if (index == dl->index) memberships++; @@ -410,7 +429,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) struct imsm_map *map = get_imsm_map(dev, 0); for (j = 0; j < map->num_members; j++) { - __u32 index = get_imsm_disk_idx(map, j); + __u32 index = get_imsm_disk_idx(dev, j); if (index == dl->index) { e->start = __le32_to_cpu(map->pba_of_lba0); @@ -427,23 +446,25 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) return rv; } -#ifndef MDASSEMBLE static void print_imsm_dev(struct imsm_dev *dev, int index) { __u64 sz; int slot; struct imsm_map *map = get_imsm_map(dev, 0); + __u32 ord; printf("\n"); printf("[%s]:\n", dev->volume); printf(" RAID Level : %d\n", get_imsm_raid_level(map)); printf(" Members : %d\n", map->num_members); for (slot = 0; slot < map->num_members; slot++) - if (index == get_imsm_disk_idx(map, slot)) + if (index == get_imsm_disk_idx(dev, slot)) break; - if (slot < map->num_members) - printf(" This Slot : %d\n", slot); - else + if (slot < map->num_members) { + ord = get_imsm_ord_tbl_ent(dev, slot); + printf(" This Slot : %d%s\n", slot, + ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); + } else printf(" This Slot : ?\n"); sz = __le32_to_cpu(dev->size_high); sz <<= 32; @@ -467,7 +488,7 @@ static void print_imsm_dev(struct imsm_dev *dev, int index) printf(" Map State : %s", map_state_str[map->map_state]); if (dev->vol.migr_state) { struct imsm_map *map = get_imsm_map(dev, 1); - printf(", %s", map_state_str[map->map_state]); + printf(" <-- %s", map_state_str[map->map_state]); } printf("\n"); printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); @@ -476,7 +497,7 @@ static void print_imsm_dev(struct imsm_dev *dev, int index) static void print_imsm_disk(struct imsm_super *mpb, int index) { struct imsm_disk *disk = __get_imsm_disk(mpb, index); - char str[MAX_RAID_SERIAL_LEN]; + char str[MAX_RAID_SERIAL_LEN + 1]; __u32 s; __u64 sz; @@ -484,7 +505,7 @@ static void print_imsm_disk(struct imsm_super *mpb, int index) return; printf("\n"); - snprintf(str, MAX_RAID_SERIAL_LEN, "%s", disk->serial); + snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); printf(" Disk%02d Serial : %s\n", index, str); s = __le32_to_cpu(disk->status); printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "", @@ -539,9 +560,18 @@ static void examine_super_imsm(struct supertype *st, char *homehost) } } +static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info); + static void brief_examine_super_imsm(struct supertype *st) { - printf("ARRAY /dev/imsm metadata=imsm\n"); + /* We just write a generic DDF ARRAY entry + */ + struct mdinfo info; + char nbuf[64]; + + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf("ARRAY /dev/imsm metadata=imsm UUID=%s\n", nbuf + 5); } static void detail_super_imsm(struct supertype *st, char *homehost) @@ -551,7 +581,11 @@ static void detail_super_imsm(struct supertype *st, char *homehost) static void brief_detail_super_imsm(struct supertype *st) { - printf("%s\n", __FUNCTION__); + struct mdinfo info; + char nbuf[64]; + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf(" UUID=%s", nbuf + 5); } #endif @@ -559,12 +593,51 @@ static int match_home_imsm(struct supertype *st, char *homehost) { printf("%s\n", __FUNCTION__); - return 0; + return -1; } static void uuid_from_super_imsm(struct supertype *st, int uuid[4]) { - printf("%s\n", __FUNCTION__); + /* The uuid returned here is used for: + * uuid to put into bitmap file (Create, Grow) + * uuid for backup header when saving critical section (Grow) + * comparing uuids when re-adding a device into an array + * In these cases the uuid required is that of the data-array, + * not the device-set. + * uuid to recognise same set when adding a missing device back + * to an array. This is a uuid for the device-set. + * + * For each of these we can make do with a truncated + * or hashed uuid rather than the original, as long as + * everyone agrees. + * In each case the uuid required is that of the data-array, + * not the device-set. + */ + /* imsm does not track uuid's so we synthesis one using sha1 on + * - The signature (Which is constant for all imsm array, but no matter) + * - the family_num of the container + * - the index number of the volume + * - the 'serial' number of the volume. + * Hopefully these are all constant. + */ + struct intel_super *super = st->sb; + + char buf[20]; + struct sha1_ctx ctx; + struct imsm_dev *dev = NULL; + + sha1_init_ctx(&ctx); + sha1_process_bytes(super->anchor->sig, MAX_SIGNATURE_LENGTH, &ctx); + sha1_process_bytes(&super->anchor->family_num, sizeof(__u32), &ctx); + if (super->current_vol >= 0) + dev = get_imsm_dev(super, super->current_vol); + if (dev) { + __u32 vol = super->current_vol; + sha1_process_bytes(&vol, sizeof(vol), &ctx); + sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx); + } + sha1_finish_ctx(&ctx, buf); + memcpy(uuid, buf, 4*4); } #if 0 @@ -623,17 +696,32 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) info->array.md_minor = -1; info->array.ctime = 0; info->array.utime = 0; - info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512); + info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; + info->array.state = !dev->vol.dirty; + + info->disk.major = 0; + info->disk.minor = 0; info->data_offset = __le32_to_cpu(map->pba_of_lba0); info->component_size = __le32_to_cpu(map->blocks_per_member); + memset(info->uuid, 0, sizeof(info->uuid)); - info->disk.major = 0; - info->disk.minor = 0; + if (map->map_state == IMSM_T_STATE_UNINITIALIZED || + dev->vol.dirty || dev->vol.migr_state) + info->resync_start = 0; + else + info->resync_start = ~0ULL; + + strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN); + info->name[MAX_RAID_SERIAL_LEN] = 0; + info->array.major_version = -1; + info->array.minor_version = -2; sprintf(info->text_version, "/%s/%d", devnum2devname(st->container_dev), info->container_member); + info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */ + uuid_from_super_imsm(st, info->uuid); } @@ -663,9 +751,13 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->disk.minor = 0; info->disk.raid_disk = -1; info->reshape_active = 0; + info->array.major_version = -1; + info->array.minor_version = -2; strcpy(info->text_version, "imsm"); + info->safe_mode_delay = 0; info->disk.number = -1; info->disk.state = 0; + info->name[0] = 0; if (super->disks) { disk = &super->disks->disk; @@ -679,6 +771,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0; info->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0; } + uuid_from_super_imsm(st, info->uuid); } static int update_super_imsm(struct supertype *st, struct mdinfo *info, @@ -777,10 +870,15 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst) sec->anchor->num_raid_devs > 0) { if (first->anchor->family_num != sec->anchor->family_num) return 3; - if (first->anchor->mpb_size != sec->anchor->mpb_size) - return 3; - if (first->anchor->check_sum != sec->anchor->check_sum) - return 3; + } + + /* if 'first' is a spare promote it to a populated mpb with sec's + * family number + */ + if (first->anchor->num_raid_devs == 0 && + sec->anchor->num_raid_devs > 0) { + first->anchor->num_raid_devs = sec->anchor->num_raid_devs; + first->anchor->family_num = sec->anchor->family_num; } return 0; @@ -819,20 +917,19 @@ static int imsm_read_serial(int fd, char *devname, unsigned char scsi_serial[255]; int rv; int rsp_len; - int i, cnt; + int len; + char *c, *rsp_buf; memset(scsi_serial, 0, sizeof(scsi_serial)); - if (imsm_env_devname_as_serial()) { - char name[MAX_RAID_SERIAL_LEN]; - - fd2devname(fd, name); - strcpy((char *) serial, name); + rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial)); + + if (rv && imsm_env_devname_as_serial()) { + memset(serial, 0, MAX_RAID_SERIAL_LEN); + fd2devname(fd, (char *) serial); return 0; } - rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial)); - if (rv != 0) { if (devname) fprintf(stderr, @@ -841,19 +938,34 @@ static int imsm_read_serial(int fd, char *devname, return rv; } + /* trim whitespace */ rsp_len = scsi_serial[3]; - for (i = 0, cnt = 0; i < rsp_len; i++) { - if (!isspace(scsi_serial[4 + i])) - serial[cnt++] = scsi_serial[4 + i]; - if (cnt == MAX_RAID_SERIAL_LEN) - break; - } - - serial[MAX_RAID_SERIAL_LEN - 1] = '\0'; + rsp_buf = (char *) &scsi_serial[4]; + c = rsp_buf; + while (isspace(*c)) + c++; + if (c + MAX_RAID_SERIAL_LEN > rsp_buf + rsp_len) + len = rsp_len - (c - rsp_buf); + else + len = MAX_RAID_SERIAL_LEN; + memcpy(serial, c, len); + c = (char *) &serial[len - 1]; + while (isspace(*c) || *c == '\0') + *c-- = '\0'; return 0; } +static int serialcmp(__u8 *s1, __u8 *s2) +{ + return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN); +} + +static void serialcpy(__u8 *dest, __u8 *src) +{ + strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN); +} + static int load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) { @@ -874,7 +986,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) * check if we need to update dl->index */ for (dl = super->disks; dl; dl = dl->next) - if (memcmp(dl->serial, serial, MAX_RAID_SERIAL_LEN) == 0) + if (serialcmp(dl->serial, serial) == 0) break; if (!dl) @@ -897,7 +1009,8 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) dl->next = super->disks; dl->fd = keep_fd ? fd : -1; dl->devname = devname ? strdup(devname) : NULL; - strncpy((char *) dl->serial, (char *) serial, MAX_RAID_SERIAL_LEN); + serialcpy(dl->serial, serial); + dl->index = -2; } else if (keep_fd) { close(dl->fd); dl->fd = fd; @@ -909,8 +1022,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) disk_iter = __get_imsm_disk(super->anchor, i); - if (memcmp(disk_iter->serial, dl->serial, - MAX_RAID_SERIAL_LEN) == 0) { + if (serialcmp(disk_iter->serial, dl->serial) == 0) { __u32 status; dl->disk = *disk_iter; @@ -918,34 +1030,20 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) /* only set index on disks that are a member of a * populated contianer, i.e. one with raid_devs */ - if (status & SPARE_DISK) + if (status & FAILED_DISK) + dl->index = -2; + else if (status & SPARE_DISK) dl->index = -1; else dl->index = i; + break; } } - if (i == super->anchor->num_disks && alloc) { - if (devname) - fprintf(stderr, - Name ": failed to load disk with serial \'%s\' for %s\n", - dl->serial, devname); - free(dl); - return 1; - } - if (i == super->anchor->num_disks && dl->index >= 0) { - if (devname) - fprintf(stderr, - Name ": confused... disk %d with serial \'%s\' " - "is not listed in the current anchor\n", - dl->index, dl->serial); - return 1; - } - if (alloc) super->disks = dl; - + return 0; } @@ -954,31 +1052,74 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src) memcpy(dest, src, sizeof_imsm_dev(src, 0)); } -static void dup_map(struct imsm_dev *dev) +#ifndef MDASSEMBLE +/* When migrating map0 contains the 'destination' state while map1 + * contains the current state. When not migrating map0 contains the + * current state. This routine assumes that map[0].map_state is set to + * the current array state before being called. + * + * Migration is indicated by one of the following states + * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed) + * 2/ Initialize (migr_state=1 migr_type=0 map0state=normal + * map1state=unitialized) + * 3/ Verify (Resync) (migr_state=1 migr_type=1 map0state=normal + * map1state=normal) + * 4/ Rebuild (migr_state=1 migr_type=1 map0state=normal + * map1state=degraded) + */ +static void migrate(struct imsm_dev *dev, __u8 to_state, int rebuild_resync) { - struct imsm_map *dest = get_imsm_map(dev, 1); + struct imsm_map *dest; struct imsm_map *src = get_imsm_map(dev, 0); + dev->vol.migr_state = 1; + dev->vol.migr_type = rebuild_resync; + dest = get_imsm_map(dev, 1); + memcpy(dest, src, sizeof_imsm_map(src)); + src->map_state = to_state; } +#endif static int parse_raid_devices(struct intel_super *super) { int i; struct imsm_dev *dev_new; - size_t len; + size_t len, len_migr; + size_t space_needed = 0; + struct imsm_super *mpb = super->anchor; for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i); - len = sizeof_imsm_dev(dev_iter, 1); - dev_new = malloc(len); + len = sizeof_imsm_dev(dev_iter, 0); + len_migr = sizeof_imsm_dev(dev_iter, 1); + if (len_migr > len) + space_needed += len_migr - len; + + dev_new = malloc(len_migr); if (!dev_new) return 1; imsm_copy_dev(dev_new, dev_iter); super->dev_tbl[i] = dev_new; } + /* ensure that super->buf is large enough when all raid devices + * are migrating + */ + if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) { + void *buf; + + len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512); + if (posix_memalign(&buf, 512, len) != 0) + return 1; + + memcpy(buf, super->buf, len); + free(super->buf); + super->buf = buf; + super->len = len; + } + return 0; } @@ -1044,7 +1185,6 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) } __free_imsm(super, 0); - super->len = __le32_to_cpu(anchor->mpb_size); super->len = ROUND_UP(anchor->mpb_size, 512); if (posix_memalign(&super->buf, 512, super->len) != 0) { if (devname) @@ -1101,6 +1241,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) rc = load_imsm_disk(fd, super, devname, 0); if (rc == 0) rc = parse_raid_devices(super); + return rc; } @@ -1264,12 +1405,13 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, } *sbp = super; + st->container_dev = fd2devnum(fd); if (st->ss == NULL) { st->ss = &super_imsm; st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; - st->container_dev = fd2devnum(fd); } + st->loaded_container = 1; return 0; } @@ -1312,6 +1454,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; } + st->loaded_container = 0; return 0; } @@ -1434,7 +1577,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { /* initialized in add_to_super */ - map->disk_ord_tbl[i] = __cpu_to_le32(0); + set_imsm_ord_tbl_ent(map, i, 0); } mpb->num_raid_devs++; super->dev_tbl[super->current_vol] = dev; @@ -1486,6 +1629,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, return 1; } +#ifndef MDASSEMBLE static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { @@ -1512,7 +1656,7 @@ static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, dl->index = super->anchor->num_disks; super->anchor->num_disks++; } - map->disk_ord_tbl[dk->number] = __cpu_to_le32(dl->index); + set_imsm_ord_tbl_ent(map, dk->number, dl->index); status = CONFIGURED_DISK | USABLE_DISK; dl->disk.status = __cpu_to_le32(status); @@ -1556,7 +1700,6 @@ static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, dd->minor = minor(stb.st_rdev); dd->index = -1; dd->devname = devname ? strdup(devname) : NULL; - dd->next = super->disks; dd->fd = fd; rv = imsm_read_serial(fd, devname, dd->serial); if (rv) { @@ -1569,14 +1712,21 @@ static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, get_dev_size(fd, NULL, &size); size /= 512; status = USABLE_DISK | SPARE_DISK; - strcpy((char *) dd->disk.serial, (char *) dd->serial); + serialcpy(dd->disk.serial, dd->serial); dd->disk.total_blocks = __cpu_to_le32(size); dd->disk.status = __cpu_to_le32(status); if (sysfs_disk_to_scsi_id(fd, &id) == 0) dd->disk.scsi_id = __cpu_to_le32(id); else dd->disk.scsi_id = __cpu_to_le32(0); - super->disks = dd; + + if (st->update_tail) { + dd->next = super->add; + super->add = dd; + } else { + dd->next = super->disks; + super->disks = dd; + } } static int store_imsm_mpb(int fd, struct intel_super *super); @@ -1598,7 +1748,7 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) mpb->generation_num = __cpu_to_le32(1UL); for (d = super->disks; d; d = d->next) { - if (d->index >= 0) + if (d->index != -1) continue; mpb->disk[0] = d->disk; @@ -1630,7 +1780,6 @@ static int write_super_imsm(struct intel_super *super, int doclose) __u32 generation; __u32 sum; int spares = 0; - int raid_disks = 0; int i; __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk); @@ -1640,19 +1789,13 @@ static int write_super_imsm(struct intel_super *super, int doclose) mpb->generation_num = __cpu_to_le32(generation); for (d = super->disks; d; d = d->next) { - if (d->index < 0) + if (d->index == -1) spares++; else { - raid_disks++; mpb->disk[d->index] = d->disk; mpb_size += sizeof(struct imsm_disk); } } - if (raid_disks != mpb->num_disks) { - fprintf(stderr, "%s: expected %d disks only found %d\n", - __func__, mpb->num_disks, raid_disks); - return 1; - } for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = __get_imsm_dev(mpb, i); @@ -1671,11 +1814,9 @@ static int write_super_imsm(struct intel_super *super, int doclose) for (d = super->disks; d ; d = d->next) { if (d->index < 0) continue; - if (store_imsm_mpb(d->fd, super)) { + if (store_imsm_mpb(d->fd, super)) fprintf(stderr, "%s: failed for device %d:%d %s\n", __func__, d->major, d->minor, strerror(errno)); - return 1; - } if (doclose) { close(d->fd); d->fd = -1; @@ -1688,46 +1829,81 @@ static int write_super_imsm(struct intel_super *super, int doclose) return 0; } + +static int create_array(struct supertype *st) +{ + size_t len; + struct imsm_update_create_array *u; + struct intel_super *super = st->sb; + struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); + + len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0); + u = malloc(len); + if (!u) { + fprintf(stderr, "%s: failed to allocate update buffer\n", + __func__); + return 1; + } + + u->type = update_create_array; + u->dev_idx = super->current_vol; + imsm_copy_dev(&u->dev, dev); + append_metadata_update(st, u, len); + + return 0; +} + +static int _add_disk(struct supertype *st) +{ + struct intel_super *super = st->sb; + size_t len; + struct imsm_update_add_disk *u; + + if (!super->add) + return 0; + + len = sizeof(*u); + u = malloc(len); + if (!u) { + fprintf(stderr, "%s: failed to allocate update buffer\n", + __func__); + return 1; + } + + u->type = update_add_disk; + append_metadata_update(st, u, len); + + return 0; +} + static int write_init_super_imsm(struct supertype *st) { if (st->update_tail) { - /* queue the recently created array as a metadata update */ - size_t len; - struct imsm_update_create_array *u; + /* queue the recently created array / added disk + * as a metadata update */ struct intel_super *super = st->sb; - struct imsm_dev *dev; struct dl *d; + int rv; - if (super->current_vol < 0 || - !(dev = get_imsm_dev(super, super->current_vol))) { - fprintf(stderr, "%s: could not determine sub-array\n", - __func__); - return 1; - } - - - len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0); - u = malloc(len); - if (!u) { - fprintf(stderr, "%s: failed to allocate update buffer\n", - __func__); - return 1; - } - - u->type = update_create_array; - u->dev_idx = super->current_vol; - imsm_copy_dev(&u->dev, dev); - append_metadata_update(st, u, len); + /* determine if we are creating a volume or adding a disk */ + if (super->current_vol < 0) { + /* in the add disk case we are running in mdmon + * context, so don't close fd's + */ + return _add_disk(st); + } else + rv = create_array(st); for (d = super->disks; d ; d = d->next) { close(d->fd); d->fd = -1; } - return 0; + return rv; } else return write_super_imsm(st->sb, 1); } +#endif static int store_zero_imsm(struct supertype *st, int fd) { @@ -1749,6 +1925,12 @@ static int store_zero_imsm(struct supertype *st, int fd) return 0; } +static int imsm_bbm_log_size(struct imsm_super *mpb) +{ + return __le32_to_cpu(mpb->bbm_log_size); +} + +#ifndef MDASSEMBLE static int validate_geometry_imsm_container(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, char *dev, @@ -1890,11 +2072,6 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 1; } -int imsm_bbm_log_size(struct imsm_super *mpb) -{ - return __le32_to_cpu(mpb->bbm_log_size); -} - static int validate_geometry_imsm(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, char *dev, unsigned long long *freesize, @@ -1982,6 +2159,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, return 1; } +#endif /* MDASSEMBLE */ static struct mdinfo *container_content_imsm(struct supertype *st) { @@ -2007,7 +2185,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st) for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); - struct imsm_vol *vol = &dev->vol; struct imsm_map *map = get_imsm_map(dev, 0); struct mdinfo *this; int slot; @@ -2016,32 +2193,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st) memset(this, 0, sizeof(*this)); this->next = rest; - this->array.level = get_imsm_raid_level(map); - this->array.raid_disks = map->num_members; - this->array.layout = imsm_level_to_layout(this->array.level); - this->array.md_minor = -1; - this->array.ctime = 0; - this->array.utime = 0; - this->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; - this->array.state = !vol->dirty; - this->container_member = i; - if (map->map_state == IMSM_T_STATE_UNINITIALIZED || - dev->vol.dirty || dev->vol.migr_state) - this->resync_start = 0; - else - this->resync_start = ~0ULL; - - strncpy(this->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN); - this->name[MAX_RAID_SERIAL_LEN] = 0; - - sprintf(this->text_version, "/%s/%d", - devnum2devname(st->container_dev), - this->container_member); - - memset(this->uuid, 0, sizeof(this->uuid)); - - this->component_size = __le32_to_cpu(map->blocks_per_member); - + super->current_vol = i; + getinfo_super_imsm_volume(st, this); for (slot = 0 ; slot < map->num_members; slot++) { struct mdinfo *info_d; struct dl *d; @@ -2051,7 +2204,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st) __u32 ord; skip = 0; - idx = get_imsm_disk_idx(map, slot); + idx = get_imsm_disk_idx(dev, slot); ord = get_imsm_ord_tbl_ent(dev, slot); for (d = super->disks; d ; d = d->next) if (d->index == idx) @@ -2111,6 +2264,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st) } +#ifndef MDASSEMBLE static int imsm_open_new(struct supertype *c, struct active_array *a, char *inst) { @@ -2128,9 +2282,8 @@ static int imsm_open_new(struct supertype *c, struct active_array *a, return 0; } -static __u8 imsm_check_degraded(struct intel_super *super, int n, int failed) +static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed) { - struct imsm_dev *dev = get_imsm_dev(super, n); struct imsm_map *map = get_imsm_map(dev, 0); if (!failed) @@ -2156,22 +2309,24 @@ static __u8 imsm_check_degraded(struct intel_super *super, int n, int failed) int device_per_mirror = 2; /* FIXME is this always the case? * and are they always adjacent? */ - int failed = 0; + int r10fail = 0; int i; for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(map, i); + int idx = get_imsm_disk_idx(dev, i); struct imsm_disk *disk = get_imsm_disk(super, idx); - if (__le32_to_cpu(disk->status) & FAILED_DISK) - failed++; + if (!disk) + r10fail++; + else if (__le32_to_cpu(disk->status) & FAILED_DISK) + r10fail++; - if (failed >= device_per_mirror) + if (r10fail >= device_per_mirror) return IMSM_T_STATE_FAILED; - /* reset 'failed' for next mirror set */ + /* reset 'r10fail' for next mirror set */ if (!((i + 1) % device_per_mirror)) - failed = 0; + r10fail = 0; } return IMSM_T_STATE_DEGRADED; @@ -2189,75 +2344,110 @@ static __u8 imsm_check_degraded(struct intel_super *super, int n, int failed) return map->map_state; } -static int imsm_count_failed(struct intel_super *super, struct imsm_map *map) +static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) { int i; int failed = 0; struct imsm_disk *disk; + struct imsm_map *map = get_imsm_map(dev, 0); for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(map, i); + __u32 ord = get_imsm_ord_tbl_ent(dev, i); + int idx = ord_to_idx(ord); disk = get_imsm_disk(super, idx); - if (__le32_to_cpu(disk->status) & FAILED_DISK) - failed++; - else if (!(__le32_to_cpu(disk->status) & USABLE_DISK)) + if (!disk || + __le32_to_cpu(disk->status) & FAILED_DISK || + ord & IMSM_ORD_REBUILD) failed++; } return failed; } +static int is_resyncing(struct imsm_dev *dev) +{ + struct imsm_map *migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (dev->vol.migr_type == 0) + return 1; + + migr_map = get_imsm_map(dev, 1); + + if (migr_map->map_state == IMSM_T_STATE_NORMAL) + return 1; + else + return 0; +} + +static int is_rebuilding(struct imsm_dev *dev) +{ + struct imsm_map *migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (dev->vol.migr_type == 0) + return 0; + + migr_map = get_imsm_map(dev, 1); + + if (migr_map->map_state == IMSM_T_STATE_DEGRADED) + return 1; + else + return 0; +} + +/* Handle dirty -> clean transititions and resync. Degraded and rebuild + * states are handled in imsm_set_disk() with one exception, when a + * resync is stopped due to a new failure this routine will set the + * 'degraded' state for the array. + */ static int imsm_set_array_state(struct active_array *a, int consistent) { int inst = a->info.container_member; struct intel_super *super = a->container->sb; struct imsm_dev *dev = get_imsm_dev(super, inst); struct imsm_map *map = get_imsm_map(dev, 0); - int dirty = !consistent; - int failed; - __u8 map_state; + int failed = imsm_count_failed(super, dev); + __u8 map_state = imsm_check_degraded(super, dev, failed); - failed = imsm_count_failed(super, map); - map_state = imsm_check_degraded(super, inst, failed); - - if (consistent && !dev->vol.dirty && - (dev->vol.migr_state || map_state != IMSM_T_STATE_NORMAL)) - a->resync_start = 0ULL; - if (consistent == 2 && a->resync_start != ~0ULL) + if (consistent == 2 && + (a->resync_start != ~0ULL || + map_state != IMSM_T_STATE_NORMAL || + dev->vol.migr_state)) consistent = 0; if (a->resync_start == ~0ULL) { - /* complete recovery or initial resync */ - if (map->map_state != map_state) { - dprintf("imsm: map_state %d: %d\n", - inst, map_state); - map->map_state = map_state; - super->updates_pending++; - } - if (dev->vol.migr_state) { - dprintf("imsm: mark resync complete\n"); + /* complete intialization / resync, + * recovery is completed in ->set_disk + */ + if (is_resyncing(dev)) { + dprintf("imsm: mark resync done\n"); dev->vol.migr_state = 0; - dev->vol.migr_type = 0; + map->map_state = map_state; super->updates_pending++; } - } else if (!dev->vol.migr_state) { - dprintf("imsm: mark '%s' (%llu)\n", - failed ? "rebuild" : "initializing", a->resync_start); - /* mark that we are rebuilding */ - map->map_state = failed ? map_state : IMSM_T_STATE_NORMAL; - dev->vol.migr_state = 1; - dev->vol.migr_type = failed ? 1 : 0; - dup_map(dev); - a->check_degraded = 1; + } else if (!is_resyncing(dev) && !failed) { + /* mark the start of the init process if nothing is failed */ + dprintf("imsm: mark resync start (%llu)\n", a->resync_start); + map->map_state = map_state; + migrate(dev, IMSM_T_STATE_NORMAL, + map->map_state == IMSM_T_STATE_NORMAL); super->updates_pending++; } /* mark dirty / clean */ - if (dirty != dev->vol.dirty) { + if (dev->vol.dirty != !consistent) { dprintf("imsm: mark '%s' (%llu)\n", - dirty ? "dirty" : "clean", a->resync_start); - dev->vol.dirty = dirty; + consistent ? "clean" : "dirty", a->resync_start); + if (consistent) + dev->vol.dirty = 0; + else + dev->vol.dirty = 1; super->updates_pending++; } return consistent; @@ -2270,9 +2460,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state) struct imsm_dev *dev = get_imsm_dev(super, inst); struct imsm_map *map = get_imsm_map(dev, 0); struct imsm_disk *disk; + int failed; __u32 status; - int failed = 0; - int new_failure = 0; + __u32 ord; + __u8 map_state; if (n > map->num_members) fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n", @@ -2283,46 +2474,46 @@ static void imsm_set_disk(struct active_array *a, int n, int state) dprintf("imsm: set_disk %d:%x\n", n, state); - disk = get_imsm_disk(super, get_imsm_disk_idx(map, n)); + ord = get_imsm_ord_tbl_ent(dev, n); + disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ status = __le32_to_cpu(disk->status); if ((state & DS_FAULTY) && !(status & FAILED_DISK)) { status |= FAILED_DISK; disk->status = __cpu_to_le32(status); - new_failure = 1; + disk->scsi_id = __cpu_to_le32(~(__u32)0); + memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); super->updates_pending++; } /* check if in_sync */ - if ((state & DS_INSYNC) && !(status & USABLE_DISK)) { - status |= USABLE_DISK; - disk->status = __cpu_to_le32(status); + if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) { + struct imsm_map *migr_map = get_imsm_map(dev, 1); + + set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord)); super->updates_pending++; } - /* the number of failures have changed, count up 'failed' to determine - * degraded / failed status - */ - if (new_failure && map->map_state != IMSM_T_STATE_FAILED) - failed = imsm_count_failed(super, map); - - /* determine map_state based on failed or in_sync count */ - if (failed) - map->map_state = imsm_check_degraded(super, inst, failed); - else if (map->map_state == IMSM_T_STATE_DEGRADED) { - struct mdinfo *d; - int working = 0; - - for (d = a->info.devs ; d ; d = d->next) - if (d->curr_state & DS_INSYNC) - working++; + failed = imsm_count_failed(super, dev); + map_state = imsm_check_degraded(super, dev, failed); - if (working == a->info.array.raid_disks) { - map->map_state = IMSM_T_STATE_NORMAL; - dev->vol.migr_state = 0; - dev->vol.migr_type = 0; - super->updates_pending++; - } + /* check if recovery complete, newly degraded, or failed */ + if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) { + map->map_state = map_state; + dev->vol.migr_state = 0; + super->updates_pending++; + } else if (map_state == IMSM_T_STATE_DEGRADED && + map->map_state != map_state && + !dev->vol.migr_state) { + dprintf("imsm: mark degraded\n"); + map->map_state = map_state; + super->updates_pending++; + } else if (map_state == IMSM_T_STATE_FAILED && + map->map_state != map_state) { + dprintf("imsm: mark failed\n"); + dev->vol.migr_state = 0; + map->map_state = map_state; + super->updates_pending++; } } @@ -2372,15 +2563,14 @@ static void imsm_sync_metadata(struct supertype *container) static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - struct imsm_map *map = get_imsm_map(dev, 0); - int i = get_imsm_disk_idx(map, idx); + int i = get_imsm_disk_idx(dev, idx); struct dl *dl; for (dl = super->disks; dl; dl = dl->next) if (dl->index == i) break; - if (__le32_to_cpu(dl->disk.status) & FAILED_DISK) + if (dl && __le32_to_cpu(dl->disk.status) & FAILED_DISK) dl = NULL; if (dl) @@ -2389,9 +2579,10 @@ static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_a return dl; } -static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct active_array *a) +static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); + int idx = get_imsm_disk_idx(dev, slot); struct imsm_map *map = get_imsm_map(dev, 0); unsigned long long esize; unsigned long long pos; @@ -2406,7 +2597,8 @@ static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct acti for (dl = super->disks; dl; dl = dl->next) { /* If in this array, skip */ for (d = a->info.devs ; d ; d = d->next) - if (d->disk.major == dl->major && + if (d->state_fd >= 0 && + d->disk.major == dl->major && d->disk.minor == dl->minor) { dprintf("%x:%x already in array\n", dl->major, dl->minor); break; @@ -2414,13 +2606,13 @@ static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct acti if (d) continue; - /* skip marked in use or failed drives */ + /* skip in use or failed drives */ status = __le32_to_cpu(dl->disk.status); - if (status & FAILED_DISK || status & CONFIGURED_DISK) { + if (status & FAILED_DISK || idx == dl->index) { dprintf("%x:%x status ( %s%s)\n", dl->major, dl->minor, status & FAILED_DISK ? "failed " : "", - status & CONFIGURED_DISK ? "configured " : ""); + idx == dl->index ? "in use " : ""); continue; } @@ -2506,7 +2698,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", inst, failed, a->info.array.raid_disks, a->info.array.level); - if (imsm_check_degraded(super, inst, failed) != IMSM_T_STATE_DEGRADED) + if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED) return NULL; /* For each slot, if it is not working, find a spare */ @@ -2590,23 +2782,25 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, return rv; } -static int disks_overlap(struct imsm_map *m1, struct imsm_map *m2) +static int disks_overlap(struct imsm_dev *d1, struct imsm_dev *d2) { + struct imsm_map *m1 = get_imsm_map(d1, 0); + struct imsm_map *m2 = get_imsm_map(d2, 0); int i; int j; int idx; for (i = 0; i < m1->num_members; i++) { - idx = get_imsm_disk_idx(m1, i); + idx = get_imsm_disk_idx(d1, i); for (j = 0; j < m2->num_members; j++) - if (idx == get_imsm_disk_idx(m2, j)) + if (idx == get_imsm_disk_idx(d2, j)) return 1; } return 0; } -static void imsm_delete(struct intel_super *super, struct dl **dlp); +static void imsm_delete(struct intel_super *super, struct dl **dlp, int index); static void imsm_process_update(struct supertype *st, struct metadata_update *update) @@ -2620,20 +2814,41 @@ static void imsm_process_update(struct supertype *st, * flag */ struct intel_super *super = st->sb; - struct imsm_super *mpb = super->anchor; + struct imsm_super *mpb; enum imsm_update_type type = *(enum imsm_update_type *) update->buf; + /* update requires a larger buf but the allocation failed */ + if (super->next_len && !super->next_buf) { + super->next_len = 0; + return; + } + + if (super->next_buf) { + memcpy(super->next_buf, super->buf, super->len); + free(super->buf); + super->len = super->next_len; + super->buf = super->next_buf; + + super->next_len = 0; + super->next_buf = NULL; + } + + mpb = super->anchor; + switch (type) { case update_activate_spare: { struct imsm_update_activate_spare *u = (void *) update->buf; struct imsm_dev *dev = get_imsm_dev(super, u->array); struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *migr_map; struct active_array *a; struct imsm_disk *disk; __u32 status; + __u8 to_state; struct dl *dl; unsigned int found; - int victim; + int failed; + int victim = get_imsm_disk_idx(dev, u->slot); int i; for (dl = super->disks; dl; dl = dl->next) @@ -2642,52 +2857,67 @@ static void imsm_process_update(struct supertype *st, if (!dl) { fprintf(stderr, "error: imsm_activate_spare passed " - "an unknown disk (index: %d serial: %s)\n", - u->dl->index, u->dl->serial); + "an unknown disk (index: %d)\n", + u->dl->index); return; } super->updates_pending++; + /* count failures (excluding rebuilds and the victim) + * to determine map[0] state + */ + failed = 0; + for (i = 0; i < map->num_members; i++) { + if (i == u->slot) + continue; + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + if (!disk || + __le32_to_cpu(disk->status) & FAILED_DISK) + failed++; + } + /* adding a pristine spare, assign a new index */ if (dl->index < 0) { dl->index = super->anchor->num_disks; super->anchor->num_disks++; } - victim = get_imsm_disk_idx(map, u->slot); - map->disk_ord_tbl[u->slot] = __cpu_to_le32(dl->index); disk = &dl->disk; status = __le32_to_cpu(disk->status); status |= CONFIGURED_DISK; - status &= ~(SPARE_DISK | USABLE_DISK); + status &= ~SPARE_DISK; disk->status = __cpu_to_le32(status); + /* mark rebuild */ + to_state = imsm_check_degraded(super, dev, failed); + map->map_state = IMSM_T_STATE_DEGRADED; + migrate(dev, to_state, 1); + migr_map = get_imsm_map(dev, 1); + set_imsm_ord_tbl_ent(map, u->slot, dl->index); + set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); + /* count arrays using the victim in the metadata */ found = 0; for (a = st->arrays; a ; a = a->next) { dev = get_imsm_dev(super, a->info.container_member); - map = get_imsm_map(dev, 0); for (i = 0; i < map->num_members; i++) - if (victim == get_imsm_disk_idx(map, i)) + if (victim == get_imsm_disk_idx(dev, i)) found++; } - /* clear some flags if the victim is no longer being + /* delete the victim if it is no longer being * utilized anywhere */ if (!found) { struct dl **dlp; - for (dlp = &super->disks; *dlp; ) + + for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) if ((*dlp)->index == victim) break; - disk = &(*dlp)->disk; - status = __le32_to_cpu(disk->status); - status &= ~(CONFIGURED_DISK | USABLE_DISK); - disk->status = __cpu_to_le32(status); /* We know that 'manager' isn't touching anything, * so it is safe to: */ - imsm_delete(super, dlp); + imsm_delete(super, dlp, victim); } break; } @@ -2739,7 +2969,7 @@ static void imsm_process_update(struct supertype *st, if ((new_start >= start && new_start <= end) || (start >= new_start && start <= new_end)) overlap = 1; - if (overlap && disks_overlap(map, new_map)) { + if (overlap && disks_overlap(dev, &u->dev)) { dprintf("%s: arrays overlap\n", __func__); return; } @@ -2758,27 +2988,48 @@ static void imsm_process_update(struct supertype *st, super->updates_pending++; dev = update->space; + map = get_imsm_map(dev, 0); update->space = NULL; imsm_copy_dev(dev, &u->dev); + map = get_imsm_map(dev, 0); super->dev_tbl[u->dev_idx] = dev; mpb->num_raid_devs++; - /* fix up flags, if arrays overlap then the drives can not be - * spares - */ + /* fix up flags */ for (i = 0; i < map->num_members; i++) { struct imsm_disk *disk; __u32 status; - disk = get_imsm_disk(super, get_imsm_disk_idx(map, i)); + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); status = __le32_to_cpu(disk->status); status |= CONFIGURED_DISK; - if (overlap) - status &= ~SPARE_DISK; + status &= ~SPARE_DISK; disk->status = __cpu_to_le32(status); } break; } + case update_add_disk: + + /* we may be able to repair some arrays if disks are + * being added */ + if (super->add) { + struct active_array *a; + for (a = st->arrays; a; a = a->next) + a->check_degraded = 1; + } + /* add some spares to the metadata */ + while (super->add) { + struct dl *al; + + al = super->add; + super->add = al->next; + al->next = super->disks; + super->disks = al; + dprintf("%s: added %x:%x\n", + __func__, al->major, al->minor); + } + + break; } } @@ -2786,25 +3037,23 @@ static void imsm_prepare_update(struct supertype *st, struct metadata_update *update) { /** - * Allocate space to hold new disk entries, raid-device entries or a - * new mpb if necessary. We currently maintain an mpb large enough to - * hold 2 subarrays for the given number of disks. This may not be - * sufficient when reshaping. - * - * FIX ME handle the reshape case. - * - * The monitor will be able to safely change super->mpb by arranging - * for it to be freed in check_update_queue(). I.e. the monitor thread - * will start using the new pointer and the manager can continue to use - * the old value until check_update_queue() runs. + * Allocate space to hold new disk entries, raid-device entries or a new + * mpb if necessary. The manager synchronously waits for updates to + * complete in the monitor, so new mpb buffers allocated here can be + * integrated by the monitor thread without worrying about live pointers + * in the manager thread. */ enum imsm_update_type type = *(enum imsm_update_type *) update->buf; + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + size_t buf_len; + size_t len = 0; switch (type) { case update_create_array: { struct imsm_update_create_array *u = (void *) update->buf; - size_t len = sizeof_imsm_dev(&u->dev, 1); + len = sizeof_imsm_dev(&u->dev, 1); update->space = malloc(len); break; default: @@ -2812,44 +3061,76 @@ static void imsm_prepare_update(struct supertype *st, } } - return; + /* check if we need a larger metadata buffer */ + if (super->next_buf) + buf_len = super->next_len; + else + buf_len = super->len; + + if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) { + /* ok we need a larger buf than what is currently allocated + * if this allocation fails process_update will notice that + * ->next_len is set and ->next_buf is NULL + */ + buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512); + if (super->next_buf) + free(super->next_buf); + + super->next_len = buf_len; + if (posix_memalign(&super->next_buf, buf_len, 512) != 0) + super->next_buf = NULL; + } } /* must be called while manager is quiesced */ -static void imsm_delete(struct intel_super *super, struct dl **dlp) +static void imsm_delete(struct intel_super *super, struct dl **dlp, int index) { struct imsm_super *mpb = super->anchor; - struct dl *dl = *dlp; struct dl *iter; struct imsm_dev *dev; struct imsm_map *map; - int i, j; + int i, j, num_members; + __u32 ord; - dprintf("%s: deleting device %x:%x from imsm_super\n", - __func__, dl->major, dl->minor); + dprintf("%s: deleting device[%d] from imsm_super\n", + __func__, index); /* shift all indexes down one */ for (iter = super->disks; iter; iter = iter->next) - if (iter->index > dl->index) + if (iter->index > index) iter->index--; for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); map = get_imsm_map(dev, 0); + num_members = map->num_members; + for (j = 0; j < num_members; j++) { + /* update ord entries being careful not to propagate + * ord-flags to the first map + */ + ord = get_imsm_ord_tbl_ent(dev, j); - for (j = 0; j < map->num_members; j++) { - int idx = get_imsm_disk_idx(map, j); + if (ord_to_idx(ord) <= index) + continue; - if (idx > dl->index) - map->disk_ord_tbl[j] = __cpu_to_le32(idx - 1); + map = get_imsm_map(dev, 0); + set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1)); + map = get_imsm_map(dev, 1); + if (map) + set_imsm_ord_tbl_ent(map, j, ord - 1); } } mpb->num_disks--; super->updates_pending++; - *dlp = (*dlp)->next; - __free_imsm_disk(dl); + if (*dlp) { + struct dl *dl = *dlp; + + *dlp = (*dlp)->next; + __free_imsm_disk(dl); + } } +#endif /* MDASSEMBLE */ struct superswitch super_imsm = { #ifndef MDASSEMBLE @@ -2858,6 +3139,8 @@ struct superswitch super_imsm = { .detail_super = detail_super_imsm, .brief_detail_super = brief_detail_super_imsm, .write_init_super = write_init_super_imsm, + .validate_geometry = validate_geometry_imsm, + .add_to_super = add_to_super_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, @@ -2870,15 +3153,14 @@ struct superswitch super_imsm = { .load_super = load_super_imsm, .init_super = init_super_imsm, - .add_to_super = add_to_super_imsm, .store_super = store_zero_imsm, .free_super = free_super_imsm, .match_metadata_desc = match_metadata_desc_imsm, .container_content = container_content_imsm, - .validate_geometry = validate_geometry_imsm, .external = 1, +#ifndef MDASSEMBLE /* for mdmon */ .open_new = imsm_open_new, .load_super = load_super_imsm, @@ -2888,4 +3170,5 @@ struct superswitch super_imsm = { .activate_spare = imsm_activate_spare, .process_update = imsm_process_update, .prepare_update = imsm_prepare_update, +#endif /* MDASSEMBLE */ };