X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=super-intel.c;h=f5ce06bdacb055f5373369d076e800eb8e7b983b;hp=0ebf87faa8190588a9bfe85c668bf4a6c9fb5255;hb=40ebbb9cfee33d550ab683846a9d38d2995a7059;hpb=24565c9a99e291edb773e1d2dc9478949792ca39 diff --git a/super-intel.c b/super-intel.c index 0ebf87fa..f5ce06bd 100644 --- a/super-intel.c +++ b/super-intel.c @@ -1,7 +1,7 @@ /* * mdadm - Intel(R) Matrix Storage Manager Support * - * Copyright (C) 2002-2007 Intel Corporation + * Copyright (C) 2002-2008 Intel Corporation * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,8 +17,10 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define HAVE_STDINT_H 1 #include "mdadm.h" #include "mdmon.h" +#include "sha1.h" #include #include #include @@ -75,7 +77,8 @@ struct imsm_map { } __attribute__ ((packed)); struct imsm_vol { - __u32 reserved[2]; + __u32 curr_migr_unit; + __u32 reserved; __u8 migr_state; /* Normal or Migrating */ __u8 migr_type; /* Initializing, Rebuilding, ... */ __u8 dirty; @@ -177,6 +180,7 @@ struct intel_super { int fd; } *disks; struct dl *add; /* list of disks to add while mdmon active */ + struct dl *missing; /* disks removed while we weren't looking */ struct bbm_log *bbm_log; }; @@ -209,17 +213,6 @@ struct imsm_update_add_disk { enum imsm_update_type type; }; -static int imsm_env_devname_as_serial(void) -{ - char *val = getenv("IMSM_DEVNAME_AS_SERIAL"); - - if (val && atoi(val) == 1) - return 1; - - return 0; -} - - static struct supertype *match_metadata_desc_imsm(char *arg) { struct supertype *st; @@ -238,10 +231,12 @@ static struct supertype *match_metadata_desc_imsm(char *arg) return st; } +#ifndef MDASSEMBLE static __u8 *get_imsm_version(struct imsm_super *mpb) { return &mpb->sig[MPB_SIG_LEN]; } +#endif /* retrieve a disk directly from the anchor when the anchor is known to be * up-to-date, currently only at load time @@ -253,6 +248,7 @@ static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index) return &mpb->disk[index]; } +#ifndef MDASSEMBLE /* retrieve a disk from the parsed metadata */ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) { @@ -264,6 +260,7 @@ static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) return NULL; } +#endif /* generate a checksum directly from the anchor when the anchor is known to be * up-to-date, currently only at load or write_super after coalescing @@ -400,6 +397,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) struct extent *rv, *e; int i, j; int memberships = 0; + __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); @@ -433,13 +431,59 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) } qsort(rv, memberships, sizeof(*rv), cmp_extent); - e->start = __le32_to_cpu(dl->disk.total_blocks) - - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS); + /* determine the start of the metadata + * when no raid devices are defined use the default + * ...otherwise allow the metadata to truncate the value + * as is the case with older versions of imsm + */ + if (memberships) { + struct extent *last = &rv[memberships - 1]; + __u32 remainder; + + remainder = __le32_to_cpu(dl->disk.total_blocks) - + (last->start + last->size); + if (reservation > remainder) + reservation = remainder; + } + e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation; e->size = 0; return rv; } +/* try to determine how much space is reserved for metadata from + * the last get_extents() entry, otherwise fallback to the + * default + */ +static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl) +{ + struct extent *e; + int i; + __u32 rv; + + /* for spares just return a minimal reservation which will grow + * once the spare is picked up by an array + */ + if (dl->index == -1) + return MPB_SECTOR_CNT; + + e = get_extents(super, dl); + if (!e) + return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + + /* scroll to last entry */ + for (i = 0; e[i].size; i++) + continue; + + rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start; + + free(e); + + return rv; +} + #ifndef MDASSEMBLE +static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info); + static void print_imsm_dev(struct imsm_dev *dev, int index) { __u64 sz; @@ -448,7 +492,7 @@ static void print_imsm_dev(struct imsm_dev *dev, int index) __u32 ord; printf("\n"); - printf("[%s]:\n", dev->volume); + printf("[%.16s]:\n", dev->volume); printf(" RAID Level : %d\n", get_imsm_raid_level(map)); printf(" Members : %d\n", map->num_members); for (slot = 0; slot < map->num_members; slot++) @@ -488,10 +532,10 @@ static void print_imsm_dev(struct imsm_dev *dev, int index) printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); } -static void print_imsm_disk(struct imsm_super *mpb, int index) +static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) { struct imsm_disk *disk = __get_imsm_disk(mpb, index); - char str[MAX_RAID_SERIAL_LEN]; + char str[MAX_RAID_SERIAL_LEN + 1]; __u32 s; __u64 sz; @@ -499,7 +543,7 @@ static void print_imsm_disk(struct imsm_super *mpb, int index) return; printf("\n"); - snprintf(str, MAX_RAID_SERIAL_LEN, "%s", disk->serial); + snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); printf(" Disk%02d Serial : %s\n", index, str); s = __le32_to_cpu(disk->status); printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "", @@ -507,8 +551,7 @@ static void print_imsm_disk(struct imsm_super *mpb, int index) s&FAILED_DISK ? " failed" : "", s&USABLE_DISK ? " usable" : ""); printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); - sz = __le32_to_cpu(disk->total_blocks) - - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS * mpb->num_raid_devs); + sz = __le32_to_cpu(disk->total_blocks) - reserved; printf(" Usable Size : %llu%s\n", (unsigned long long)sz, human_size(sz * 512)); } @@ -519,7 +562,11 @@ static void examine_super_imsm(struct supertype *st, char *homehost) struct imsm_super *mpb = super->anchor; char str[MAX_SIGNATURE_LENGTH]; int i; + struct mdinfo info; + char nbuf[64]; __u32 sum; + __u32 reserved = imsm_reserved_sectors(super, super->disks); + snprintf(str, MPB_SIG_LEN, "%s", mpb->sig); printf(" Magic : %s\n", str); @@ -527,13 +574,16 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Version : %s\n", get_imsm_version(mpb)); printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf(" UUID : %s\n", nbuf + 5); sum = __le32_to_cpu(mpb->check_sum); printf(" Checksum : %08x %s\n", sum, __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect"); printf(" MPB Sectors : %d\n", mpb_sectors(mpb)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); - print_imsm_disk(mpb, super->disks->index); + print_imsm_disk(mpb, super->disks->index, reserved); if (super->bbm_log) { struct bbm_log *log = super->bbm_log; @@ -550,13 +600,35 @@ static void examine_super_imsm(struct supertype *st, char *homehost) for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; - print_imsm_disk(mpb, i); + print_imsm_disk(mpb, i, reserved); } } +static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info); + static void brief_examine_super_imsm(struct supertype *st) { - printf("ARRAY /dev/imsm metadata=imsm\n"); + /* We just write a generic IMSM ARRAY entry */ + struct mdinfo info; + char nbuf[64]; + struct intel_super *super = st->sb; + int i; + + if (!super->anchor->num_raid_devs) + return; + + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf("ARRAY /dev/imsm metadata=imsm auto=md UUID=%s\n", nbuf + 5); + for (i = 0; i < super->anchor->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + + super->current_vol = i; + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf("ARRAY /dev/md/%.16s container=/dev/imsm member=%d auto=mdp UUID=%s\n", + dev->volume, i, nbuf + 5); + } } static void detail_super_imsm(struct supertype *st, char *homehost) @@ -566,7 +638,11 @@ static void detail_super_imsm(struct supertype *st, char *homehost) static void brief_detail_super_imsm(struct supertype *st) { - printf("%s\n", __FUNCTION__); + struct mdinfo info; + char nbuf[64]; + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf(" UUID=%s", nbuf + 5); } #endif @@ -574,18 +650,51 @@ static int match_home_imsm(struct supertype *st, char *homehost) { printf("%s\n", __FUNCTION__); - return 0; + return -1; } static void uuid_from_super_imsm(struct supertype *st, int uuid[4]) { - /* imsm does not track uuid's so just make sure we never return - * the same value twice to break uuid matching in Manage_subdevs - * FIXME what about the use of uuid's with bitmap's? + /* The uuid returned here is used for: + * uuid to put into bitmap file (Create, Grow) + * uuid for backup header when saving critical section (Grow) + * comparing uuids when re-adding a device into an array + * In these cases the uuid required is that of the data-array, + * not the device-set. + * uuid to recognise same set when adding a missing device back + * to an array. This is a uuid for the device-set. + * + * For each of these we can make do with a truncated + * or hashed uuid rather than the original, as long as + * everyone agrees. + * In each case the uuid required is that of the data-array, + * not the device-set. */ - static int dummy_id = 0; + /* imsm does not track uuid's so we synthesis one using sha1 on + * - The signature (Which is constant for all imsm array, but no matter) + * - the family_num of the container + * - the index number of the volume + * - the 'serial' number of the volume. + * Hopefully these are all constant. + */ + struct intel_super *super = st->sb; - uuid[0] = dummy_id++; + char buf[20]; + struct sha1_ctx ctx; + struct imsm_dev *dev = NULL; + + sha1_init_ctx(&ctx); + sha1_process_bytes(super->anchor->sig, MAX_SIGNATURE_LENGTH, &ctx); + sha1_process_bytes(&super->anchor->family_num, sizeof(__u32), &ctx); + if (super->current_vol >= 0) + dev = get_imsm_dev(super, super->current_vol); + if (dev) { + __u32 vol = super->current_vol; + sha1_process_bytes(&vol, sizeof(vol), &ctx); + sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx); + } + sha1_finish_ctx(&ctx, buf); + memcpy(uuid, buf, 4*4); } #if 0 @@ -626,7 +735,7 @@ static int imsm_level_to_layout(int level) case 6: return ALGORITHM_LEFT_ASYMMETRIC; case 10: - return 0x102; //FIXME is this correct? + return 0x102; } return -1; } @@ -644,17 +753,33 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) info->array.md_minor = -1; info->array.ctime = 0; info->array.utime = 0; - info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512); + info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; + info->array.state = !dev->vol.dirty; + + info->disk.major = 0; + info->disk.minor = 0; info->data_offset = __le32_to_cpu(map->pba_of_lba0); info->component_size = __le32_to_cpu(map->blocks_per_member); + memset(info->uuid, 0, sizeof(info->uuid)); - info->disk.major = 0; - info->disk.minor = 0; + if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) + info->resync_start = 0; + else if (dev->vol.migr_state) + info->resync_start = __le32_to_cpu(dev->vol.curr_migr_unit); + else + info->resync_start = ~0ULL; + strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN); + info->name[MAX_RAID_SERIAL_LEN] = 0; + + info->array.major_version = -1; + info->array.minor_version = -2; sprintf(info->text_version, "/%s/%d", devnum2devname(st->container_dev), info->container_member); + info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */ + uuid_from_super_imsm(st, info->uuid); } @@ -684,22 +809,33 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->disk.minor = 0; info->disk.raid_disk = -1; info->reshape_active = 0; + info->array.major_version = -1; + info->array.minor_version = -2; strcpy(info->text_version, "imsm"); + info->safe_mode_delay = 0; info->disk.number = -1; info->disk.state = 0; + info->name[0] = 0; if (super->disks) { + __u32 reserved = imsm_reserved_sectors(super, super->disks); + disk = &super->disks->disk; - info->disk.number = super->disks->index; - info->disk.raid_disk = super->disks->index; - info->data_offset = __le32_to_cpu(disk->total_blocks) - - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS); - info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved; + info->component_size = reserved; s = __le32_to_cpu(disk->status); info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0; info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0; - info->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0; + info->disk.state |= s & SPARE_DISK ? 0 : (1 << MD_DISK_SYNC); } + + /* only call uuid_from_super_imsm when this disk is part of a populated container, + * ->compare_super may have updated the 'num_raid_devs' field for spares + */ + if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs) + uuid_from_super_imsm(st, info->uuid); + else + memcpy(info->uuid, uuid_match_any, sizeof(int[4])); } static int update_super_imsm(struct supertype *st, struct mdinfo *info, @@ -805,6 +941,24 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst) */ if (first->anchor->num_raid_devs == 0 && sec->anchor->num_raid_devs > 0) { + int i; + + /* we need to copy raid device info from sec if an allocation + * fails here we don't associate the spare + */ + for (i = 0; i < sec->anchor->num_raid_devs; i++) { + first->dev_tbl[i] = malloc(sizeof(struct imsm_dev)); + if (!first->dev_tbl) { + while (--i >= 0) { + free(first->dev_tbl[i]); + first->dev_tbl[i] = NULL; + } + fprintf(stderr, "imsm: failed to associate spare\n"); + return 3; + } + *first->dev_tbl[i] = *sec->dev_tbl[i]; + } + first->anchor->num_raid_devs = sec->anchor->num_raid_devs; first->anchor->family_num = sec->anchor->family_num; } @@ -845,20 +999,19 @@ static int imsm_read_serial(int fd, char *devname, unsigned char scsi_serial[255]; int rv; int rsp_len; - int i, cnt; + int len; + char *c, *rsp_buf; memset(scsi_serial, 0, sizeof(scsi_serial)); - if (imsm_env_devname_as_serial()) { - char name[MAX_RAID_SERIAL_LEN]; - - fd2devname(fd, name); - strcpy((char *) serial, name); + rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial)); + + if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) { + memset(serial, 0, MAX_RAID_SERIAL_LEN); + fd2devname(fd, (char *) serial); return 0; } - rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial)); - if (rv != 0) { if (devname) fprintf(stderr, @@ -867,19 +1020,41 @@ static int imsm_read_serial(int fd, char *devname, return rv; } + /* trim leading whitespace */ rsp_len = scsi_serial[3]; - for (i = 0, cnt = 0; i < rsp_len; i++) { - if (!isspace(scsi_serial[4 + i])) - serial[cnt++] = scsi_serial[4 + i]; - if (cnt == MAX_RAID_SERIAL_LEN) - break; - } + rsp_buf = (char *) &scsi_serial[4]; + c = rsp_buf; + while (isspace(*c)) + c++; + + /* truncate len to the end of rsp_buf if necessary */ + if (c + MAX_RAID_SERIAL_LEN > rsp_buf + rsp_len) + len = rsp_len - (c - rsp_buf); + else + len = MAX_RAID_SERIAL_LEN; + + /* initialize the buffer and copy rsp_buf characters */ + memset(serial, 0, MAX_RAID_SERIAL_LEN); + memcpy(serial, c, len); - serial[MAX_RAID_SERIAL_LEN - 1] = '\0'; + /* trim trailing whitespace starting with the last character copied */ + c = (char *) &serial[len - 1]; + while (isspace(*c) || *c == '\0') + *c-- = '\0'; return 0; } +static int serialcmp(__u8 *s1, __u8 *s2) +{ + return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN); +} + +static void serialcpy(__u8 *dest, __u8 *src) +{ + strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN); +} + static int load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) { @@ -900,7 +1075,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) * check if we need to update dl->index */ for (dl = super->disks; dl; dl = dl->next) - if (memcmp(dl->serial, serial, MAX_RAID_SERIAL_LEN) == 0) + if (serialcmp(dl->serial, serial) == 0) break; if (!dl) @@ -923,7 +1098,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) dl->next = super->disks; dl->fd = keep_fd ? fd : -1; dl->devname = devname ? strdup(devname) : NULL; - strncpy((char *) dl->serial, (char *) serial, MAX_RAID_SERIAL_LEN); + serialcpy(dl->serial, serial); dl->index = -2; } else if (keep_fd) { close(dl->fd); @@ -936,8 +1111,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) disk_iter = __get_imsm_disk(super->anchor, i); - if (memcmp(disk_iter->serial, dl->serial, - MAX_RAID_SERIAL_LEN) == 0) { + if (serialcmp(disk_iter->serial, dl->serial) == 0) { __u32 status; dl->disk = *disk_iter; @@ -956,6 +1130,13 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) } } + /* no match, maybe a stale failed drive */ + if (i == super->anchor->num_disks && dl->index >= 0) { + dl->disk = *__get_imsm_disk(super->anchor, dl->index); + if (__le32_to_cpu(dl->disk.status) & FAILED_DISK) + dl->index = -2; + } + if (alloc) super->disks = dl; @@ -967,14 +1148,45 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src) memcpy(dest, src, sizeof_imsm_dev(src, 0)); } -static void dup_map(struct imsm_dev *dev) +#ifndef MDASSEMBLE +/* When migrating map0 contains the 'destination' state while map1 + * contains the current state. When not migrating map0 contains the + * current state. This routine assumes that map[0].map_state is set to + * the current array state before being called. + * + * Migration is indicated by one of the following states + * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed) + * 2/ Initialize (migr_state=1 migr_type=0 map0state=normal + * map1state=unitialized) + * 3/ Verify (Resync) (migr_state=1 migr_type=1 map0state=normal + * map1state=normal) + * 4/ Rebuild (migr_state=1 migr_type=1 map0state=normal + * map1state=degraded) + */ +static void migrate(struct imsm_dev *dev, __u8 to_state, int rebuild_resync) { - struct imsm_map *dest = get_imsm_map(dev, 1); + struct imsm_map *dest; struct imsm_map *src = get_imsm_map(dev, 0); + dev->vol.migr_state = 1; + dev->vol.migr_type = rebuild_resync; + dev->vol.curr_migr_unit = 0; + dest = get_imsm_map(dev, 1); + memcpy(dest, src, sizeof_imsm_map(src)); + src->map_state = to_state; } +static void end_migration(struct imsm_dev *dev, __u8 map_state) +{ + struct imsm_map *map = get_imsm_map(dev, 0); + + dev->vol.migr_state = 0; + dev->vol.curr_migr_unit = 0; + map->map_state = map_state; +} +#endif + static int parse_raid_devices(struct intel_super *super) { int i; @@ -1150,12 +1362,19 @@ static void __free_imsm_disk(struct dl *d) } static void free_imsm_disks(struct intel_super *super) { - while (super->disks) { - struct dl *d = super->disks; + struct dl *d; + while (super->disks) { + d = super->disks; super->disks = d->next; __free_imsm_disk(d); } + while (super->missing) { + d = super->missing; + super->missing = d->next; + __free_imsm_disk(d); + } + } /* free all the pieces hanging off of a super pointer */ @@ -1207,6 +1426,49 @@ static struct intel_super *alloc_super(int creating_imsm) } #ifndef MDASSEMBLE +/* find_missing - helper routine for load_super_imsm_all that identifies + * disks that have disappeared from the system. This routine relies on + * the mpb being uptodate, which it is at load time. + */ +static int find_missing(struct intel_super *super) +{ + int i; + struct imsm_super *mpb = super->anchor; + struct dl *dl; + struct imsm_disk *disk; + __u32 status; + + for (i = 0; i < mpb->num_disks; i++) { + disk = __get_imsm_disk(mpb, i); + for (dl = super->disks; dl; dl = dl->next) + if (serialcmp(dl->disk.serial, disk->serial) == 0) + break; + if (dl) + continue; + /* ok we have a 'disk' without a live entry in + * super->disks + */ + status = __le32_to_cpu(disk->status); + if (status & FAILED_DISK || !(status & USABLE_DISK)) + continue; /* never mind, already marked */ + + dl = malloc(sizeof(*dl)); + if (!dl) + return 1; + dl->major = 0; + dl->minor = 0; + dl->fd = -1; + dl->devname = strdup("missing"); + dl->index = i; + serialcpy(dl->serial, disk->serial); + dl->disk = *disk; + dl->next = super->missing; + super->missing = dl; + } + + return 0; +} + static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, char *devname, int keep_fd) { @@ -1291,6 +1553,12 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, close(dfd); } + + if (find_missing(super) != 0) { + free_imsm(super); + return 2; + } + if (st->subarray[0]) { if (atoi(st->subarray) <= super->anchor->num_raid_devs) super->current_vol = atoi(st->subarray); @@ -1305,6 +1573,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; } + st->loaded_container = 1; return 0; } @@ -1347,6 +1616,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; } + st->loaded_container = 0; return 0; } @@ -1441,6 +1711,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol->migr_state = 0; vol->migr_type = 0; vol->dirty = 0; + vol->curr_migr_unit = 0; for (i = 0; i < idx; i++) { struct imsm_dev *prev = get_imsm_dev(super, i); struct imsm_map *pmap = get_imsm_map(prev, 0); @@ -1521,6 +1792,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, return 1; } +#ifndef MDASSEMBLE static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { @@ -1603,7 +1875,7 @@ static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, get_dev_size(fd, NULL, &size); size /= 512; status = USABLE_DISK | SPARE_DISK; - strcpy((char *) dd->disk.serial, (char *) dd->serial); + serialcpy(dd->disk.serial, dd->serial); dd->disk.total_blocks = __cpu_to_le32(size); dd->disk.status = __cpu_to_le32(status); if (sysfs_disk_to_scsi_id(fd, &id) == 0) @@ -1679,14 +1951,15 @@ static int write_super_imsm(struct intel_super *super, int doclose) generation++; mpb->generation_num = __cpu_to_le32(generation); + mpb_size += sizeof(struct imsm_disk) * mpb->num_disks; for (d = super->disks; d; d = d->next) { if (d->index == -1) spares++; - else { + else mpb->disk[d->index] = d->disk; - mpb_size += sizeof(struct imsm_disk); - } } + for (d = super->missing; d; d = d->next) + mpb->disk[d->index] = d->disk; for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = __get_imsm_dev(mpb, i); @@ -1720,6 +1993,7 @@ static int write_super_imsm(struct intel_super *super, int doclose) return 0; } + static int create_array(struct supertype *st) { size_t len; @@ -1743,7 +2017,7 @@ static int create_array(struct supertype *st) return 0; } -static int add_disk(struct supertype *st) +static int _add_disk(struct supertype *st) { struct intel_super *super = st->sb; size_t len; @@ -1780,7 +2054,7 @@ static int write_init_super_imsm(struct supertype *st) /* in the add disk case we are running in mdmon * context, so don't close fd's */ - return add_disk(st); + return _add_disk(st); } else rv = create_array(st); @@ -1793,6 +2067,7 @@ static int write_init_super_imsm(struct supertype *st) } else return write_super_imsm(st->sb, 1); } +#endif static int store_zero_imsm(struct supertype *st, int fd) { @@ -1814,6 +2089,12 @@ static int store_zero_imsm(struct supertype *st, int fd) return 0; } +static int imsm_bbm_log_size(struct imsm_super *mpb) +{ + return __le32_to_cpu(mpb->bbm_log_size); +} + +#ifndef MDASSEMBLE static int validate_geometry_imsm_container(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, char *dev, @@ -1955,11 +2236,6 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 1; } -int imsm_bbm_log_size(struct imsm_super *mpb) -{ - return __le32_to_cpu(mpb->bbm_log_size); -} - static int validate_geometry_imsm(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, char *dev, unsigned long long *freesize, @@ -2047,6 +2323,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, return 1; } +#endif /* MDASSEMBLE */ static struct mdinfo *container_content_imsm(struct supertype *st) { @@ -2072,7 +2349,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st) for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); - struct imsm_vol *vol = &dev->vol; struct imsm_map *map = get_imsm_map(dev, 0); struct mdinfo *this; int slot; @@ -2081,32 +2357,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st) memset(this, 0, sizeof(*this)); this->next = rest; - this->array.level = get_imsm_raid_level(map); - this->array.raid_disks = map->num_members; - this->array.layout = imsm_level_to_layout(this->array.level); - this->array.md_minor = -1; - this->array.ctime = 0; - this->array.utime = 0; - this->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; - this->array.state = !vol->dirty; - this->container_member = i; - if (map->map_state == IMSM_T_STATE_UNINITIALIZED || - dev->vol.dirty || dev->vol.migr_state) - this->resync_start = 0; - else - this->resync_start = ~0ULL; - - strncpy(this->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN); - this->name[MAX_RAID_SERIAL_LEN] = 0; - - sprintf(this->text_version, "/%s/%d", - devnum2devname(st->container_dev), - this->container_member); - - memset(this->uuid, 0, sizeof(this->uuid)); - - this->component_size = __le32_to_cpu(map->blocks_per_member); - + super->current_vol = i; + getinfo_super_imsm_volume(st, this); for (slot = 0 ; slot < map->num_members; slot++) { struct mdinfo *info_d; struct dl *d; @@ -2176,6 +2428,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st) } +#ifndef MDASSEMBLE static int imsm_open_new(struct supertype *c, struct active_array *a, char *inst) { @@ -2214,30 +2467,36 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, case 10: { /** - * check to see if any mirrors have failed, - * otherwise we are degraded + * check to see if any mirrors have failed, otherwise we + * are degraded. Even numbered slots are mirrored on + * slot+1 */ - int device_per_mirror = 2; /* FIXME is this always the case? - * and are they always adjacent? - */ - int r10fail = 0; int i; + /* gcc -Os complains that this is unused */ + int insync = insync; for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(dev, i); - struct imsm_disk *disk = get_imsm_disk(super, idx); + __u32 ord = get_imsm_ord_tbl_ent(dev, i); + int idx = ord_to_idx(ord); + struct imsm_disk *disk; - if (!disk) - r10fail++; - else if (__le32_to_cpu(disk->status) & FAILED_DISK) - r10fail++; + /* reset the potential in-sync count on even-numbered + * slots. num_copies is always 2 for imsm raid10 + */ + if ((i & 1) == 0) + insync = 2; - if (r10fail >= device_per_mirror) - return IMSM_T_STATE_FAILED; + disk = get_imsm_disk(super, idx); + if (!disk || + __le32_to_cpu(disk->status) & FAILED_DISK || + ord & IMSM_ORD_REBUILD) + insync--; - /* reset 'r10fail' for next mirror set */ - if (!((i + 1) % device_per_mirror)) - r10fail = 0; + /* no in-sync disks left in this mirror the + * array has failed + */ + if (insync == 0) + return IMSM_T_STATE_FAILED; } return IMSM_T_STATE_DEGRADED; @@ -2276,56 +2535,119 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) return failed; } +static int is_resyncing(struct imsm_dev *dev) +{ + struct imsm_map *migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (dev->vol.migr_type == 0) + return 1; + + migr_map = get_imsm_map(dev, 1); + + if (migr_map->map_state == IMSM_T_STATE_NORMAL) + return 1; + else + return 0; +} + +static int is_rebuilding(struct imsm_dev *dev) +{ + struct imsm_map *migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (dev->vol.migr_type == 0) + return 0; + + migr_map = get_imsm_map(dev, 1); + + if (migr_map->map_state == IMSM_T_STATE_DEGRADED) + return 1; + else + return 0; +} + +static void mark_failure(struct imsm_disk *disk) +{ + __u32 status = __le32_to_cpu(disk->status); + + if (status & FAILED_DISK) + return; + status |= FAILED_DISK; + disk->status = __cpu_to_le32(status); + disk->scsi_id = __cpu_to_le32(~(__u32)0); + memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); +} + +/* Handle dirty -> clean transititions and resync. Degraded and rebuild + * states are handled in imsm_set_disk() with one exception, when a + * resync is stopped due to a new failure this routine will set the + * 'degraded' state for the array. + */ static int imsm_set_array_state(struct active_array *a, int consistent) { int inst = a->info.container_member; struct intel_super *super = a->container->sb; struct imsm_dev *dev = get_imsm_dev(super, inst); struct imsm_map *map = get_imsm_map(dev, 0); - int dirty = !consistent; - int failed; - __u8 map_state; + int failed = imsm_count_failed(super, dev); + __u8 map_state = imsm_check_degraded(super, dev, failed); - failed = imsm_count_failed(super, dev); - map_state = imsm_check_degraded(super, dev, failed); + /* before we activate this array handle any missing disks */ + if (consistent == 2 && super->missing) { + struct dl *dl; - if (consistent && !dev->vol.dirty && - (dev->vol.migr_state || map_state != IMSM_T_STATE_NORMAL)) - a->resync_start = 0ULL; - if (consistent == 2 && a->resync_start != ~0ULL) + dprintf("imsm: mark missing\n"); + end_migration(dev, map_state); + for (dl = super->missing; dl; dl = dl->next) + mark_failure(&dl->disk); + super->updates_pending++; + } + + if (consistent == 2 && + (!is_resync_complete(a) || + map_state != IMSM_T_STATE_NORMAL || + dev->vol.migr_state)) consistent = 0; - if (a->resync_start == ~0ULL) { - /* complete recovery or initial resync */ - if (map->map_state != map_state) { - dprintf("imsm: map_state %d: %d\n", - inst, map_state); - map->map_state = map_state; - super->updates_pending++; - } - if (dev->vol.migr_state) { - dprintf("imsm: mark resync complete\n"); - dev->vol.migr_state = 0; - dev->vol.migr_type = 0; + if (is_resync_complete(a)) { + /* complete intialization / resync, + * recovery is completed in ->set_disk + */ + if (is_resyncing(dev)) { + dprintf("imsm: mark resync done\n"); + end_migration(dev, map_state); super->updates_pending++; } - } else if (!dev->vol.migr_state) { - dprintf("imsm: mark '%s' (%llu)\n", - failed ? "rebuild" : "initializing", a->resync_start); - /* mark that we are rebuilding */ - map->map_state = failed ? map_state : IMSM_T_STATE_NORMAL; - dev->vol.migr_state = 1; - dev->vol.migr_type = failed ? 1 : 0; - dup_map(dev); - a->check_degraded = 1; + } else if (!is_resyncing(dev) && !failed) { + /* mark the start of the init process if nothing is failed */ + dprintf("imsm: mark resync start (%llu)\n", a->resync_start); + map->map_state = map_state; + migrate(dev, IMSM_T_STATE_NORMAL, + map->map_state == IMSM_T_STATE_NORMAL); + super->updates_pending++; + } + + /* check if we can update the migration checkpoint */ + if (dev->vol.migr_state && + __le32_to_cpu(dev->vol.curr_migr_unit) != a->resync_start) { + dprintf("imsm: checkpoint migration (%llu)\n", a->resync_start); + dev->vol.curr_migr_unit = __cpu_to_le32(a->resync_start); super->updates_pending++; } /* mark dirty / clean */ - if (dirty != dev->vol.dirty) { + if (dev->vol.dirty != !consistent) { dprintf("imsm: mark '%s' (%llu)\n", - dirty ? "dirty" : "clean", a->resync_start); - dev->vol.dirty = dirty; + consistent ? "clean" : "dirty", a->resync_start); + if (consistent) + dev->vol.dirty = 0; + else + dev->vol.dirty = 1; super->updates_pending++; } return consistent; @@ -2338,10 +2660,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state) struct imsm_dev *dev = get_imsm_dev(super, inst); struct imsm_map *map = get_imsm_map(dev, 0); struct imsm_disk *disk; + int failed; __u32 status; - int failed = 0; - int new_failure = 0; __u32 ord; + __u8 map_state; if (n > map->num_members) fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n", @@ -2358,13 +2680,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state) /* check for new failures */ status = __le32_to_cpu(disk->status); if ((state & DS_FAULTY) && !(status & FAILED_DISK)) { - status |= FAILED_DISK; - disk->status = __cpu_to_le32(status); - disk->scsi_id = __cpu_to_le32(~0UL); - memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); - new_failure = 1; + mark_failure(disk); super->updates_pending++; } + /* check if in_sync */ if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) { struct imsm_map *migr_map = get_imsm_map(dev, 1); @@ -2373,29 +2692,24 @@ static void imsm_set_disk(struct active_array *a, int n, int state) super->updates_pending++; } - /* the number of failures have changed, count up 'failed' to determine - * degraded / failed status - */ - if (new_failure && map->map_state != IMSM_T_STATE_FAILED) - failed = imsm_count_failed(super, dev); - - /* determine map_state based on failed or in_sync count */ - if (failed) - map->map_state = imsm_check_degraded(super, dev, failed); - else if (map->map_state == IMSM_T_STATE_DEGRADED) { - struct mdinfo *d; - int working = 0; - - for (d = a->info.devs ; d ; d = d->next) - if (d->curr_state & DS_INSYNC) - working++; + failed = imsm_count_failed(super, dev); + map_state = imsm_check_degraded(super, dev, failed); - if (working == a->info.array.raid_disks) { - map->map_state = IMSM_T_STATE_NORMAL; - dev->vol.migr_state = 0; - dev->vol.migr_type = 0; - super->updates_pending++; - } + /* check if recovery complete, newly degraded, or failed */ + if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) { + end_migration(dev, map_state); + super->updates_pending++; + } else if (map_state == IMSM_T_STATE_DEGRADED && + map->map_state != map_state && + !dev->vol.migr_state) { + dprintf("imsm: mark degraded\n"); + map->map_state = map_state; + super->updates_pending++; + } else if (map_state == IMSM_T_STATE_FAILED && + map->map_state != map_state) { + dprintf("imsm: mark failed\n"); + end_migration(dev, map_state); + super->updates_pending++; } } @@ -2461,9 +2775,10 @@ static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_a return dl; } -static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct active_array *a) +static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); + int idx = get_imsm_disk_idx(dev, slot); struct imsm_map *map = get_imsm_map(dev, 0); unsigned long long esize; unsigned long long pos; @@ -2478,7 +2793,8 @@ static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct acti for (dl = super->disks; dl; dl = dl->next) { /* If in this array, skip */ for (d = a->info.devs ; d ; d = d->next) - if (d->disk.major == dl->major && + if (d->state_fd >= 0 && + d->disk.major == dl->major && d->disk.minor == dl->minor) { dprintf("%x:%x already in array\n", dl->major, dl->minor); break; @@ -2486,13 +2802,13 @@ static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct acti if (d) continue; - /* skip marked in use or failed drives */ + /* skip in use or failed drives */ status = __le32_to_cpu(dl->disk.status); - if (status & FAILED_DISK || status & CONFIGURED_DISK) { + if (status & FAILED_DISK || idx == dl->index) { dprintf("%x:%x status ( %s%s)\n", dl->major, dl->minor, status & FAILED_DISK ? "failed " : "", - status & CONFIGURED_DISK ? "configured " : ""); + idx == dl->index ? "in use " : ""); continue; } @@ -2602,6 +2918,8 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, /* found a usable disk with enough space */ di = malloc(sizeof(*di)); + if (!di) + continue; memset(di, 0, sizeof(*di)); /* dl->index will be -1 in the case we are activating a @@ -2641,7 +2959,23 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, * disk_ord_tbl for the array */ mu = malloc(sizeof(*mu)); - mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares); + if (mu) { + mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares); + if (mu->buf == NULL) { + free(mu); + mu = NULL; + } + } + if (!mu) { + while (rv) { + struct mdinfo *n = rv->next; + + free(rv); + rv = n; + } + return NULL; + } + mu->space = NULL; mu->len = sizeof(struct imsm_update_activate_spare) * num_spares; mu->next = *updates; @@ -2720,12 +3054,15 @@ static void imsm_process_update(struct supertype *st, struct imsm_update_activate_spare *u = (void *) update->buf; struct imsm_dev *dev = get_imsm_dev(super, u->array); struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *migr_map; struct active_array *a; struct imsm_disk *disk; __u32 status; + __u8 to_state; struct dl *dl; unsigned int found; - int victim; + int failed; + int victim = get_imsm_disk_idx(dev, u->slot); int i; for (dl = super->disks; dl; dl = dl->next) @@ -2734,26 +3071,45 @@ static void imsm_process_update(struct supertype *st, if (!dl) { fprintf(stderr, "error: imsm_activate_spare passed " - "an unknown disk (index: %d serial: %s)\n", - u->dl->index, u->dl->serial); + "an unknown disk (index: %d)\n", + u->dl->index); return; } super->updates_pending++; + /* count failures (excluding rebuilds and the victim) + * to determine map[0] state + */ + failed = 0; + for (i = 0; i < map->num_members; i++) { + if (i == u->slot) + continue; + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + if (!disk || + __le32_to_cpu(disk->status) & FAILED_DISK) + failed++; + } + /* adding a pristine spare, assign a new index */ if (dl->index < 0) { dl->index = super->anchor->num_disks; super->anchor->num_disks++; } - victim = get_imsm_disk_idx(dev, u->slot); - set_imsm_ord_tbl_ent(map, u->slot, dl->index); disk = &dl->disk; status = __le32_to_cpu(disk->status); status |= CONFIGURED_DISK; status &= ~SPARE_DISK; disk->status = __cpu_to_le32(status); + /* mark rebuild */ + to_state = imsm_check_degraded(super, dev, failed); + map->map_state = IMSM_T_STATE_DEGRADED; + migrate(dev, to_state, 1); + migr_map = get_imsm_map(dev, 1); + set_imsm_ord_tbl_ent(map, u->slot, dl->index); + set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); + /* count arrays using the victim in the metadata */ found = 0; for (a = st->arrays; a ; a = a->next) { @@ -2769,12 +3125,18 @@ static void imsm_process_update(struct supertype *st, if (!found) { struct dl **dlp; + /* We know that 'manager' isn't touching anything, + * so it is safe to delete + */ for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) if ((*dlp)->index == victim) break; - /* We know that 'manager' isn't touching anything, - * so it is safe to: - */ + + /* victim may be on the missing list */ + if (!*dlp) + for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next) + if ((*dlp)->index == victim) + break; imsm_delete(super, dlp, victim); } break; @@ -2872,27 +3234,21 @@ static void imsm_process_update(struct supertype *st, * being added */ if (super->add) { struct active_array *a; + + super->updates_pending++; for (a = st->arrays; a; a = a->next) a->check_degraded = 1; } - /* check if we can add / replace some disks in the - * metadata */ + /* add some spares to the metadata */ while (super->add) { - struct dl **dlp, *dl, *al; + struct dl *al; + al = super->add; super->add = al->next; - for (dlp = &super->disks; *dlp ; ) { - if (memcmp(al->serial, (*dlp)->serial, - MAX_RAID_SERIAL_LEN) == 0) { - dl = *dlp; - *dlp = (*dlp)->next; - __free_imsm_disk(dl); - break; - } else - dlp = &(*dlp)->next; - } al->next = super->disks; super->disks = al; + dprintf("%s: added %x:%x\n", + __func__, al->major, al->minor); } break; @@ -2965,6 +3321,9 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, int index) for (iter = super->disks; iter; iter = iter->next) if (iter->index > index) iter->index--; + for (iter = super->missing; iter; iter = iter->next) + if (iter->index > index) + iter->index--; for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); @@ -2996,6 +3355,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, int index) __free_imsm_disk(dl); } } +#endif /* MDASSEMBLE */ struct superswitch super_imsm = { #ifndef MDASSEMBLE @@ -3004,6 +3364,8 @@ struct superswitch super_imsm = { .detail_super = detail_super_imsm, .brief_detail_super = brief_detail_super_imsm, .write_init_super = write_init_super_imsm, + .validate_geometry = validate_geometry_imsm, + .add_to_super = add_to_super_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, @@ -3016,15 +3378,14 @@ struct superswitch super_imsm = { .load_super = load_super_imsm, .init_super = init_super_imsm, - .add_to_super = add_to_super_imsm, .store_super = store_zero_imsm, .free_super = free_super_imsm, .match_metadata_desc = match_metadata_desc_imsm, .container_content = container_content_imsm, - .validate_geometry = validate_geometry_imsm, .external = 1, +#ifndef MDASSEMBLE /* for mdmon */ .open_new = imsm_open_new, .load_super = load_super_imsm, @@ -3034,4 +3395,5 @@ struct superswitch super_imsm = { .activate_spare = imsm_activate_spare, .process_update = imsm_process_update, .prepare_update = imsm_prepare_update, +#endif /* MDASSEMBLE */ };