X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=super-intel.c;h=3249b2ccbd6c59f66646c389709737f98e3be686;hp=68674e5c0c6b82a79fae944625d99425a609b8c4;hb=ff54de6e47163944185f231700e72d3122b58f4c;hpb=b8ac1967953fea50f0ec1e1650a3a9f52e6525d8 diff --git a/super-intel.c b/super-intel.c index 68674e5c..3249b2cc 100644 --- a/super-intel.c +++ b/super-intel.c @@ -17,8 +17,10 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define HAVE_STDINT_H 1 #include "mdadm.h" #include "mdmon.h" +#include "sha1.h" #include #include #include @@ -68,8 +70,10 @@ struct imsm_map { __u8 num_members; /* number of member disks */ __u8 reserved[3]; __u32 filler[7]; /* expansion area */ +#define IMSM_ORD_REBUILD (1 << 24) __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members], - top byte special */ + * top byte contains some flags + */ } __attribute__ ((packed)); struct imsm_vol { @@ -100,16 +104,43 @@ struct imsm_super { __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */ __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */ __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */ - __u32 reserved[2]; /* 0x30 - 0x37 */ + __u32 error_log_size; /* 0x30 - 0x33 in bytes */ + __u32 attributes; /* 0x34 - 0x37 */ __u8 num_disks; /* 0x38 Number of configured disks */ __u8 num_raid_devs; /* 0x39 Number of configured volumes */ - __u8 fill[2]; /* 0x3A - 0x3B */ -#define IMSM_FILLERS 39 - __u32 filler[IMSM_FILLERS]; /* 0x3C - 0xD7 RAID_MPB_FILLERS */ + __u8 error_log_pos; /* 0x3A */ + __u8 fill[1]; /* 0x3B */ + __u32 cache_size; /* 0x3c - 0x40 in mb */ + __u32 orig_family_num; /* 0x40 - 0x43 original family num */ + __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */ + __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */ +#define IMSM_FILLERS 35 + __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */ struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */ /* here comes imsm_dev[num_raid_devs] */ + /* here comes BBM logs */ } __attribute__ ((packed)); +#define BBM_LOG_MAX_ENTRIES 254 + +struct bbm_log_entry { + __u64 defective_block_start; +#define UNREADABLE 0xFFFFFFFF + __u32 spare_block_offset; + __u16 remapped_marked_count; + __u16 disk_ordinal; +} __attribute__ ((__packed__)); + +struct bbm_log { + __u32 signature; /* 0xABADB10C */ + __u32 entry_count; + __u32 reserved_spare_block_count; /* 0 */ + __u32 reserved; /* 0xFFFF */ + __u64 first_spare_lba; + struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES]; +} __attribute__ ((__packed__)); + + #ifndef MDASSEMBLE static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" }; #endif @@ -127,76 +158,123 @@ static unsigned int mpb_sectors(struct imsm_super *mpb) /* internal representation of IMSM metadata */ struct intel_super { union { - struct imsm_super *mpb; - void *buf; + void *buf; /* O_DIRECT buffer for reading/writing metadata */ + struct imsm_super *anchor; /* immovable parameters */ }; + size_t len; /* size of the 'buf' allocation */ + void *next_buf; /* for realloc'ing buf from the manager */ + size_t next_len; int updates_pending; /* count of pending updates for mdmon */ int creating_imsm; /* flag to indicate container creation */ - int creating_dev; /* index of raid device undergoing creation */ + int current_vol; /* index of raid device undergoing creation */ + #define IMSM_MAX_RAID_DEVS 2 + struct imsm_dev *dev_tbl[IMSM_MAX_RAID_DEVS]; struct dl { struct dl *next; int index; __u8 serial[MAX_RAID_SERIAL_LEN]; int major, minor; char *devname; + struct imsm_disk disk; int fd; } *disks; + struct dl *add; /* list of disks to add while mdmon active */ + struct bbm_log *bbm_log; }; struct extent { unsigned long long start, size; }; -static struct supertype *match_metadata_desc_imsm(char *arg) +/* definition of messages passed to imsm_process_update */ +enum imsm_update_type { + update_activate_spare, + update_create_array, + update_add_disk, +}; + +struct imsm_update_activate_spare { + enum imsm_update_type type; + struct dl *dl; + int slot; + int array; + struct imsm_update_activate_spare *next; +}; + +struct imsm_update_create_array { + enum imsm_update_type type; + int dev_idx; + struct imsm_dev dev; +}; + +struct imsm_update_add_disk { + enum imsm_update_type type; +}; + +static int imsm_env_devname_as_serial(void) { - struct supertype *st; + char *val = getenv("IMSM_DEVNAME_AS_SERIAL"); - if (strcmp(arg, "imsm") != 0 && - strcmp(arg, "default") != 0 - ) - return NULL; + if (val && atoi(val) == 1) + return 1; - st = malloc(sizeof(*st)); - memset(st, 0, sizeof(*st)); - st->ss = &super_imsm; - st->max_devs = IMSM_MAX_DEVICES; - st->minor_version = 0; - st->sb = NULL; - return st; + return 0; } -static struct supertype *match_metadata_desc_imsm_volume(char *arg) + +static struct supertype *match_metadata_desc_imsm(char *arg) { struct supertype *st; - if (strcmp(arg, "imsm/volume") != 0 && - strcmp(arg, "raid") != 0 && + if (strcmp(arg, "imsm") != 0 && strcmp(arg, "default") != 0 ) return NULL; st = malloc(sizeof(*st)); memset(st, 0, sizeof(*st)); - st->ss = &super_imsm_volume; + st->ss = &super_imsm; st->max_devs = IMSM_MAX_DEVICES; st->minor_version = 0; st->sb = NULL; return st; } +#ifndef MDASSEMBLE static __u8 *get_imsm_version(struct imsm_super *mpb) { return &mpb->sig[MPB_SIG_LEN]; } +#endif -static struct imsm_disk *get_imsm_disk(struct imsm_super *mpb, __u8 index) +/* retrieve a disk directly from the anchor when the anchor is known to be + * up-to-date, currently only at load time + */ +static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index) { - if (index > mpb->num_disks - 1) + if (index >= mpb->num_disks) return NULL; return &mpb->disk[index]; } -static __u32 gen_imsm_checksum(struct imsm_super *mpb) +#ifndef MDASSEMBLE +/* retrieve a disk from the parsed metadata */ +static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) +{ + struct dl *d; + + for (d = super->disks; d; d = d->next) + if (d->index == index) + return &d->disk; + + return NULL; +} +#endif + +/* generate a checksum directly from the anchor when the anchor is known to be + * up-to-date, currently only at load or write_super after coalescing + */ +static __u32 __gen_imsm_checksum(struct imsm_super *mpb) { __u32 end = mpb->mpb_size / sizeof(end); __u32 *p = (__u32 *) mpb; @@ -208,29 +286,50 @@ static __u32 gen_imsm_checksum(struct imsm_super *mpb) return sum - __le32_to_cpu(mpb->check_sum); } -static size_t sizeof_imsm_dev(struct imsm_dev *dev) +static size_t sizeof_imsm_map(struct imsm_map *map) +{ + return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1); +} + +struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map) { - size_t size = sizeof(*dev); + struct imsm_map *map = &dev->vol.map[0]; + + if (second_map && !dev->vol.migr_state) + return NULL; + else if (second_map) { + void *ptr = map; - /* each map has disk_ord_tbl[num_members - 1] additional space */ - size += sizeof(__u32) * (dev->vol.map[0].num_members - 1); + return ptr + sizeof_imsm_map(map); + } else + return map; + +} + +/* return the size of the device. + * migr_state increases the returned size if map[0] were to be duplicated + */ +static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state) +{ + size_t size = sizeof(*dev) - sizeof(struct imsm_map) + + sizeof_imsm_map(get_imsm_map(dev, 0)); /* migrating means an additional map */ - if (dev->vol.migr_state) { - size += sizeof(struct imsm_map); - size += sizeof(__u32) * (dev->vol.map[1].num_members - 1); - } + if (dev->vol.migr_state) + size += sizeof_imsm_map(get_imsm_map(dev, 1)); + else if (migr_state) + size += sizeof_imsm_map(get_imsm_map(dev, 0)); return size; } -static struct imsm_dev *get_imsm_dev(struct imsm_super *mpb, __u8 index) +static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index) { int offset; int i; void *_mpb = mpb; - if (index > mpb->num_raid_devs - 1) + if (index >= mpb->num_raid_devs) return NULL; /* devices start after all disks */ @@ -240,17 +339,42 @@ static struct imsm_dev *get_imsm_dev(struct imsm_super *mpb, __u8 index) if (i == index) return _mpb + offset; else - offset += sizeof_imsm_dev(_mpb + offset); + offset += sizeof_imsm_dev(_mpb + offset, 0); return NULL; } -static __u32 get_imsm_disk_idx(struct imsm_map *map, int slot) +static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) +{ + if (index >= super->anchor->num_raid_devs) + return NULL; + return super->dev_tbl[index]; +} + +static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot) +{ + struct imsm_map *map; + + if (dev->vol.migr_state) + map = get_imsm_map(dev, 1); + else + map = get_imsm_map(dev, 0); + + /* top byte identifies disk under rebuild */ + return __le32_to_cpu(map->disk_ord_tbl[slot]); +} + +#define ord_to_idx(ord) (((ord) << 8) >> 8) +static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot) { - __u32 *ord_tbl = &map->disk_ord_tbl[slot]; + __u32 ord = get_imsm_ord_tbl_ent(dev, slot); + + return ord_to_idx(ord); +} - /* top byte is 'special' */ - return __le32_to_cpu(*ord_tbl & ~(0xff << 24)); +static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord) +{ + map->disk_ord_tbl[slot] = __cpu_to_le32(ord); } static int get_imsm_raid_level(struct imsm_map *map) @@ -265,6 +389,7 @@ static int get_imsm_raid_level(struct imsm_map *map) return map->raid_level; } +#ifndef MDASSEMBLE static int cmp_extent(const void *av, const void *bv) { const struct extent *a = av; @@ -279,22 +404,16 @@ static int cmp_extent(const void *av, const void *bv) static struct extent *get_extents(struct intel_super *super, struct dl *dl) { /* find a list of used extents on the given physical device */ - struct imsm_super *mpb = super->mpb; - struct imsm_disk *disk; struct extent *rv, *e; int i, j; int memberships = 0; - disk = get_imsm_disk(mpb, dl->index); - if (!disk) - return NULL; - - for (i = 0; i < mpb->num_raid_devs; i++) { - struct imsm_dev *dev = get_imsm_dev(mpb, i); - struct imsm_map *map = dev->vol.map; + for (i = 0; i < super->anchor->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + struct imsm_map *map = get_imsm_map(dev, 0); for (j = 0; j < map->num_members; j++) { - __u32 index = get_imsm_disk_idx(map, j); + __u32 index = get_imsm_disk_idx(dev, j); if (index == dl->index) memberships++; @@ -305,12 +424,12 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) return NULL; e = rv; - for (i = 0; i < mpb->num_raid_devs; i++) { - struct imsm_dev *dev = get_imsm_dev(mpb, i); - struct imsm_map *map = dev->vol.map; + for (i = 0; i < super->anchor->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + struct imsm_map *map = get_imsm_map(dev, 0); for (j = 0; j < map->num_members; j++) { - __u32 index = get_imsm_disk_idx(map, j); + __u32 index = get_imsm_disk_idx(dev, j); if (index == dl->index) { e->start = __le32_to_cpu(map->pba_of_lba0); @@ -321,29 +440,31 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) } qsort(rv, memberships, sizeof(*rv), cmp_extent); - e->start = __le32_to_cpu(disk->total_blocks) - + e->start = __le32_to_cpu(dl->disk.total_blocks) - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS); e->size = 0; return rv; } -#ifndef MDASSEMBLE static void print_imsm_dev(struct imsm_dev *dev, int index) { __u64 sz; int slot; - struct imsm_map *map = dev->vol.map; + struct imsm_map *map = get_imsm_map(dev, 0); + __u32 ord; printf("\n"); printf("[%s]:\n", dev->volume); printf(" RAID Level : %d\n", get_imsm_raid_level(map)); printf(" Members : %d\n", map->num_members); for (slot = 0; slot < map->num_members; slot++) - if (index == get_imsm_disk_idx(map, slot)) + if (index == get_imsm_disk_idx(dev, slot)) break; - if (slot < map->num_members) - printf(" This Slot : %d\n", slot); - else + if (slot < map->num_members) { + ord = get_imsm_ord_tbl_ent(dev, slot); + printf(" This Slot : %d%s\n", slot, + ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); + } else printf(" This Slot : ?\n"); sz = __le32_to_cpu(dev->size_high); sz <<= 32; @@ -360,20 +481,31 @@ static void print_imsm_dev(struct imsm_dev *dev, int index) printf(" Chunk Size : %u KiB\n", __le16_to_cpu(map->blocks_per_strip) / 2); printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks)); - printf(" Migrate State : %s\n", dev->vol.migr_state ? "migrating" : "idle"); + printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle"); + if (dev->vol.migr_state) + printf(": %s", dev->vol.migr_type ? "rebuilding" : "initializing"); + printf("\n"); + printf(" Map State : %s", map_state_str[map->map_state]); + if (dev->vol.migr_state) { + struct imsm_map *map = get_imsm_map(dev, 1); + printf(" <-- %s", map_state_str[map->map_state]); + } + printf("\n"); printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); - printf(" Map State : %s\n", map_state_str[map->map_state]); } static void print_imsm_disk(struct imsm_super *mpb, int index) { - struct imsm_disk *disk = get_imsm_disk(mpb, index); - char str[MAX_RAID_SERIAL_LEN]; + struct imsm_disk *disk = __get_imsm_disk(mpb, index); + char str[MAX_RAID_SERIAL_LEN + 1]; __u32 s; __u64 sz; + if (index < 0) + return; + printf("\n"); - snprintf(str, MAX_RAID_SERIAL_LEN, "%s", disk->serial); + snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); printf(" Disk%02d Serial : %s\n", index, str); s = __le32_to_cpu(disk->status); printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "", @@ -390,7 +522,7 @@ static void print_imsm_disk(struct imsm_super *mpb, int index) static void examine_super_imsm(struct supertype *st, char *homehost) { struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; + struct imsm_super *mpb = super->anchor; char str[MAX_SIGNATURE_LENGTH]; int i; __u32 sum; @@ -403,13 +535,24 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); sum = __le32_to_cpu(mpb->check_sum); printf(" Checksum : %08x %s\n", sum, - gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect"); + __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect"); printf(" MPB Sectors : %d\n", mpb_sectors(mpb)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); print_imsm_disk(mpb, super->disks->index); + if (super->bbm_log) { + struct bbm_log *log = super->bbm_log; + + printf("\n"); + printf("Bad Block Management Log:\n"); + printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size)); + printf(" Signature : %x\n", __le32_to_cpu(log->signature)); + printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count)); + printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count)); + printf(" First Spare : %llx\n", __le64_to_cpu(log->first_spare_lba)); + } for (i = 0; i < mpb->num_raid_devs; i++) - print_imsm_dev(get_imsm_dev(mpb, i), super->disks->index); + print_imsm_dev(__get_imsm_dev(mpb, i), super->disks->index); for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; @@ -417,13 +560,18 @@ static void examine_super_imsm(struct supertype *st, char *homehost) } } +static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info); + static void brief_examine_super_imsm(struct supertype *st) { - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; + /* We just write a generic DDF ARRAY entry + */ + struct mdinfo info; + char nbuf[64]; - printf("ARRAY /dev/imsm family=%08x metadata=external:imsm\n", - __le32_to_cpu(mpb->family_num)); + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf("ARRAY /dev/imsm metadata=imsm UUID=%s\n", nbuf + 5); } static void detail_super_imsm(struct supertype *st, char *homehost) @@ -433,7 +581,11 @@ static void detail_super_imsm(struct supertype *st, char *homehost) static void brief_detail_super_imsm(struct supertype *st) { - printf("%s\n", __FUNCTION__); + struct mdinfo info; + char nbuf[64]; + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf,'-'); + printf(" UUID=%s", nbuf + 5); } #endif @@ -441,14 +593,54 @@ static int match_home_imsm(struct supertype *st, char *homehost) { printf("%s\n", __FUNCTION__); - return 0; + return -1; } static void uuid_from_super_imsm(struct supertype *st, int uuid[4]) { - printf("%s\n", __FUNCTION__); + /* The uuid returned here is used for: + * uuid to put into bitmap file (Create, Grow) + * uuid for backup header when saving critical section (Grow) + * comparing uuids when re-adding a device into an array + * In these cases the uuid required is that of the data-array, + * not the device-set. + * uuid to recognise same set when adding a missing device back + * to an array. This is a uuid for the device-set. + * + * For each of these we can make do with a truncated + * or hashed uuid rather than the original, as long as + * everyone agrees. + * In each case the uuid required is that of the data-array, + * not the device-set. + */ + /* imsm does not track uuid's so we synthesis one using sha1 on + * - The signature (Which is constant for all imsm array, but no matter) + * - the family_num of the container + * - the index number of the volume + * - the 'serial' number of the volume. + * Hopefully these are all constant. + */ + struct intel_super *super = st->sb; + + char buf[20]; + struct sha1_ctx ctx; + struct imsm_dev *dev = NULL; + + sha1_init_ctx(&ctx); + sha1_process_bytes(super->anchor->sig, MAX_SIGNATURE_LENGTH, &ctx); + sha1_process_bytes(&super->anchor->family_num, sizeof(__u32), &ctx); + if (super->current_vol >= 0) + dev = get_imsm_dev(super, super->current_vol); + if (dev) { + __u32 vol = super->current_vol; + sha1_process_bytes(&vol, sizeof(vol), &ctx); + sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx); + } + sha1_finish_ctx(&ctx, buf); + memcpy(uuid, buf, 4*4); } +#if 0 static void get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p) { @@ -474,6 +666,7 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p) *m = strtol(minor, NULL, 0); *p = strtol(patch, NULL, 0); } +#endif static int imsm_level_to_layout(int level) { @@ -483,24 +676,70 @@ static int imsm_level_to_layout(int level) return 0; case 5: case 6: - return ALGORITHM_LEFT_SYMMETRIC; + return ALGORITHM_LEFT_ASYMMETRIC; case 10: return 0x102; //FIXME is this correct? } return -1; } +static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) +{ + struct intel_super *super = st->sb; + struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); + struct imsm_map *map = get_imsm_map(dev, 0); + + info->container_member = super->current_vol; + info->array.raid_disks = map->num_members; + info->array.level = get_imsm_raid_level(map); + info->array.layout = imsm_level_to_layout(info->array.level); + info->array.md_minor = -1; + info->array.ctime = 0; + info->array.utime = 0; + info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; + info->array.state = !dev->vol.dirty; + + info->disk.major = 0; + info->disk.minor = 0; + + info->data_offset = __le32_to_cpu(map->pba_of_lba0); + info->component_size = __le32_to_cpu(map->blocks_per_member); + memset(info->uuid, 0, sizeof(info->uuid)); + + if (map->map_state == IMSM_T_STATE_UNINITIALIZED || + dev->vol.dirty || dev->vol.migr_state) + info->resync_start = 0; + else + info->resync_start = ~0ULL; + + strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN); + info->name[MAX_RAID_SERIAL_LEN] = 0; + + info->array.major_version = -1; + info->array.minor_version = -2; + sprintf(info->text_version, "/%s/%d", + devnum2devname(st->container_dev), + info->container_member); + info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */ + uuid_from_super_imsm(st, info->uuid); +} + + static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) { struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; struct imsm_disk *disk; __u32 s; - info->array.major_version = 2000; - get_imsm_numerical_version(mpb, &info->array.minor_version, - &info->array.patch_version); - info->array.raid_disks = mpb->num_disks; + if (super->current_vol >= 0) { + getinfo_super_imsm_volume(st, info); + return; + } + + /* Set raid_disks to zero so that Assemble will always pull in valid + * spares + */ + info->array.raid_disks = 0; info->array.level = LEVEL_CONTAINER; info->array.layout = 0; info->array.md_minor = -1; @@ -512,48 +751,27 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->disk.minor = 0; info->disk.raid_disk = -1; info->reshape_active = 0; + info->array.major_version = -1; + info->array.minor_version = -2; strcpy(info->text_version, "imsm"); + info->safe_mode_delay = 0; info->disk.number = -1; info->disk.state = 0; + info->name[0] = 0; if (super->disks) { + disk = &super->disks->disk; info->disk.number = super->disks->index; info->disk.raid_disk = super->disks->index; - disk = get_imsm_disk(mpb, super->disks->index); + info->data_offset = __le32_to_cpu(disk->total_blocks) - + (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS); + info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; s = __le32_to_cpu(disk->status); info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0; info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0; info->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0; } -} - -static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) -{ - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct imsm_dev *dev = get_imsm_dev(mpb, info->container_member); - struct imsm_map *map = &dev->vol.map[0]; - - info->array.major_version = 2000; - get_imsm_numerical_version(mpb, &info->array.minor_version, - &info->array.patch_version); - info->array.raid_disks = map->num_members; - info->array.level = get_imsm_raid_level(map); - info->array.layout = imsm_level_to_layout(info->array.level); - info->array.md_minor = -1; - info->array.ctime = 0; - info->array.utime = 0; - info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512); - - info->data_offset = __le32_to_cpu(map->pba_of_lba0); - info->component_size = __le32_to_cpu(map->blocks_per_member); - - info->disk.major = 0; - info->disk.minor = 0; - - sprintf(info->text_version, "/%s/%d", - devnum2devname(st->container_dev), - info->container_member); + uuid_from_super_imsm(st, info->uuid); } static int update_super_imsm(struct supertype *st, struct mdinfo *info, @@ -642,42 +860,75 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst) return 0; } - if (memcmp(first->mpb->sig, sec->mpb->sig, MAX_SIGNATURE_LENGTH) != 0) - return 3; - if (first->mpb->family_num != sec->mpb->family_num) - return 3; - if (first->mpb->mpb_size != sec->mpb->mpb_size) - return 3; - if (first->mpb->check_sum != sec->mpb->check_sum) + if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0) return 3; + /* if an anchor does not have num_raid_devs set then it is a free + * floating spare + */ + if (first->anchor->num_raid_devs > 0 && + sec->anchor->num_raid_devs > 0) { + if (first->anchor->family_num != sec->anchor->family_num) + return 3; + } + + /* if 'first' is a spare promote it to a populated mpb with sec's + * family number + */ + if (first->anchor->num_raid_devs == 0 && + sec->anchor->num_raid_devs > 0) { + first->anchor->num_raid_devs = sec->anchor->num_raid_devs; + first->anchor->family_num = sec->anchor->family_num; + } + return 0; } +static void fd2devname(int fd, char *name) +{ + struct stat st; + char path[256]; + char dname[100]; + char *nm; + int rv; + + name[0] = '\0'; + if (fstat(fd, &st) != 0) + return; + sprintf(path, "/sys/dev/block/%d:%d", + major(st.st_rdev), minor(st.st_rdev)); + + rv = readlink(path, dname, sizeof(dname)); + if (rv <= 0) + return; + + dname[rv] = '\0'; + nm = strrchr(dname, '/'); + nm++; + snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm); +} + + extern int scsi_get_serial(int fd, void *buf, size_t buf_len); static int imsm_read_serial(int fd, char *devname, __u8 serial[MAX_RAID_SERIAL_LEN]) { unsigned char scsi_serial[255]; - int sg_fd; int rv; int rsp_len; - int i, cnt; + int len; + char *c, *rsp_buf; memset(scsi_serial, 0, sizeof(scsi_serial)); - sg_fd = sysfs_disk_to_sg(fd); - if (sg_fd < 0) { - if (devname) - fprintf(stderr, - Name ": Failed to open sg interface for %s: %s\n", - devname, strerror(errno)); - return 1; - } + rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial)); - rv = scsi_get_serial(sg_fd, scsi_serial, sizeof(scsi_serial)); - close(sg_fd); + if (rv && imsm_env_devname_as_serial()) { + memset(serial, 0, MAX_RAID_SERIAL_LEN); + fd2devname(fd, (char *) serial); + return 0; + } if (rv != 0) { if (devname) @@ -687,30 +938,62 @@ static int imsm_read_serial(int fd, char *devname, return rv; } + /* trim whitespace */ rsp_len = scsi_serial[3]; - for (i = 0, cnt = 0; i < rsp_len; i++) { - if (!isspace(scsi_serial[4 + i])) - serial[cnt++] = scsi_serial[4 + i]; - if (cnt == MAX_RAID_SERIAL_LEN) - break; - } - - serial[MAX_RAID_SERIAL_LEN - 1] = '\0'; + rsp_buf = (char *) &scsi_serial[4]; + c = rsp_buf; + while (isspace(*c)) + c++; + if (c + MAX_RAID_SERIAL_LEN > rsp_buf + rsp_len) + len = rsp_len - (c - rsp_buf); + else + len = MAX_RAID_SERIAL_LEN; + memcpy(serial, c, len); + c = (char *) &serial[len - 1]; + while (isspace(*c) || *c == '\0') + *c-- = '\0'; return 0; } +static int serialcmp(__u8 *s1, __u8 *s2) +{ + return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN); +} + +static void serialcpy(__u8 *dest, __u8 *src) +{ + strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN); +} + static int load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) { - struct imsm_super *mpb = super->mpb; struct dl *dl; struct stat stb; - struct imsm_disk *disk; int rv; int i; + int alloc = 1; + __u8 serial[MAX_RAID_SERIAL_LEN]; + + rv = imsm_read_serial(fd, devname, serial); + + if (rv != 0) + return 2; + + /* check if this is a disk we have seen before. it may be a spare in + * super->disks while the current anchor believes it is a raid member, + * check if we need to update dl->index + */ + for (dl = super->disks; dl; dl = dl->next) + if (serialcmp(dl->serial, serial) == 0) + break; + + if (!dl) + dl = malloc(sizeof(*dl)); + else + alloc = 0; - dl = malloc(sizeof(*dl)); if (!dl) { if (devname) fprintf(stderr, @@ -718,50 +1001,155 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) devname); return 2; } - memset(dl, 0, sizeof(*dl)); - fstat(fd, &stb); - dl->major = major(stb.st_rdev); - dl->minor = minor(stb.st_rdev); - dl->next = super->disks; - dl->fd = keep_fd ? fd : -1; - dl->devname = devname ? strdup(devname) : NULL; - dl->index = -1; - super->disks = dl; - rv = imsm_read_serial(fd, devname, dl->serial); + if (alloc) { + fstat(fd, &stb); + dl->major = major(stb.st_rdev); + dl->minor = minor(stb.st_rdev); + dl->next = super->disks; + dl->fd = keep_fd ? fd : -1; + dl->devname = devname ? strdup(devname) : NULL; + serialcpy(dl->serial, serial); + dl->index = -2; + } else if (keep_fd) { + close(dl->fd); + dl->fd = fd; + } - if (rv != 0) - return 2; + /* look up this disk's index in the current anchor */ + for (i = 0; i < super->anchor->num_disks; i++) { + struct imsm_disk *disk_iter; - /* look up this disk's index */ - for (i = 0; i < mpb->num_disks; i++) { - disk = get_imsm_disk(mpb, i); + disk_iter = __get_imsm_disk(super->anchor, i); + + if (serialcmp(disk_iter->serial, dl->serial) == 0) { + __u32 status; + + dl->disk = *disk_iter; + status = __le32_to_cpu(dl->disk.status); + /* only set index on disks that are a member of a + * populated contianer, i.e. one with raid_devs + */ + if (status & FAILED_DISK) + dl->index = -2; + else if (status & SPARE_DISK) + dl->index = -1; + else + dl->index = i; - if (memcmp(disk->serial, dl->serial, MAX_RAID_SERIAL_LEN) == 0) break; + } } - if (i > mpb->num_disks) - return 2; + if (alloc) + super->disks = dl; + + return 0; +} + +static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src) +{ + memcpy(dest, src, sizeof_imsm_dev(src, 0)); +} + +#ifndef MDASSEMBLE +/* When migrating map0 contains the 'destination' state while map1 + * contains the current state. When not migrating map0 contains the + * current state. This routine assumes that map[0].map_state is set to + * the current array state before being called. + * + * Migration is indicated by one of the following states + * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed) + * 2/ Initialize (migr_state=1 migr_type=0 map0state=normal + * map1state=unitialized) + * 3/ Verify (Resync) (migr_state=1 migr_type=1 map0state=normal + * map1state=normal) + * 4/ Rebuild (migr_state=1 migr_type=1 map0state=normal + * map1state=degraded) + */ +static void migrate(struct imsm_dev *dev, __u8 to_state, int rebuild_resync) +{ + struct imsm_map *dest; + struct imsm_map *src = get_imsm_map(dev, 0); + + dev->vol.migr_state = 1; + dev->vol.migr_type = rebuild_resync; + dest = get_imsm_map(dev, 1); + + memcpy(dest, src, sizeof_imsm_map(src)); + src->map_state = to_state; +} +#endif + +static int parse_raid_devices(struct intel_super *super) +{ + int i; + struct imsm_dev *dev_new; + size_t len, len_migr; + size_t space_needed = 0; + struct imsm_super *mpb = super->anchor; + + for (i = 0; i < super->anchor->num_raid_devs; i++) { + struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i); + + len = sizeof_imsm_dev(dev_iter, 0); + len_migr = sizeof_imsm_dev(dev_iter, 1); + if (len_migr > len) + space_needed += len_migr - len; + + dev_new = malloc(len_migr); + if (!dev_new) + return 1; + imsm_copy_dev(dev_new, dev_iter); + super->dev_tbl[i] = dev_new; + } - dl->index = i; + /* ensure that super->buf is large enough when all raid devices + * are migrating + */ + if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) { + void *buf; + + len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512); + if (posix_memalign(&buf, 512, len) != 0) + return 1; + memcpy(buf, super->buf, len); + free(super->buf); + super->buf = buf; + super->len = len; + } + return 0; } +/* retrieve a pointer to the bbm log which starts after all raid devices */ +struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb) +{ + void *ptr = NULL; + + if (__le32_to_cpu(mpb->bbm_log_size)) { + ptr = mpb; + ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size); + } + + return ptr; +} + +static void __free_imsm(struct intel_super *super, int free_disks); + /* load_imsm_mpb - read matrix metadata * allocates super->mpb to be freed by free_super */ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) { unsigned long long dsize; - size_t len, mpb_size; unsigned long long sectors; struct stat; - struct imsm_super anchor; + struct imsm_super *anchor; __u32 check_sum; + int rc; - memset(super, 0, sizeof(*super)); get_dev_size(fd, NULL, &dsize); if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) { @@ -772,47 +1160,51 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 1; } - len = sizeof(anchor); - if (read(fd, &anchor, len) != len) { + if (posix_memalign((void**)&anchor, 512, 512) != 0) { + if (devname) + fprintf(stderr, + Name ": Failed to allocate imsm anchor buffer" + " on %s\n", devname); + return 1; + } + if (read(fd, anchor, 512) != 512) { if (devname) fprintf(stderr, Name ": Cannot read anchor block on %s: %s\n", devname, strerror(errno)); + free(anchor); return 1; } - if (strncmp((char *) anchor.sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) { + if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) { if (devname) fprintf(stderr, Name ": no IMSM anchor on %s\n", devname); + free(anchor); return 2; } - mpb_size = __le32_to_cpu(anchor.mpb_size); - super->mpb = malloc(mpb_size < 512 ? 512 : mpb_size); - if (!super->mpb) { + __free_imsm(super, 0); + super->len = ROUND_UP(anchor->mpb_size, 512); + if (posix_memalign(&super->buf, 512, super->len) != 0) { if (devname) fprintf(stderr, Name ": unable to allocate %zu byte mpb buffer\n", - mpb_size); + super->len); + free(anchor); return 2; } - memcpy(super->buf, &anchor, sizeof(anchor)); - - /* read the rest of the first block */ - len = 512 - sizeof(anchor); - if (read(fd, super->buf + sizeof(anchor), len) != len) { - if (devname) - fprintf(stderr, - Name ": Cannot read anchor remainder on %s: %s\n", - devname, strerror(errno)); - return 2; + memcpy(super->buf, anchor, 512); + + sectors = mpb_sectors(anchor) - 1; + free(anchor); + if (!sectors) { + rc = load_imsm_disk(fd, super, devname, 0); + if (rc == 0) + rc = parse_raid_devices(super); + return rc; } - sectors = mpb_sectors(&anchor) - 1; - if (!sectors) - return load_imsm_disk(fd, super, devname, 0); - /* read the extended mpb */ if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) { if (devname) @@ -822,8 +1214,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 1; } - len = mpb_size - 512; - if (read(fd, super->buf + 512, len) != len) { + if (read(fd, super->buf + 512, super->len - 512) != super->len - 512) { if (devname) fprintf(stderr, Name ": Cannot read extended mpb on %s: %s\n", @@ -831,43 +1222,71 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 2; } - check_sum = gen_imsm_checksum(super->mpb); - if (check_sum != __le32_to_cpu(super->mpb->check_sum)) { + check_sum = __gen_imsm_checksum(super->anchor); + if (check_sum != __le32_to_cpu(super->anchor->check_sum)) { if (devname) fprintf(stderr, Name ": IMSM checksum %x != %x on %s\n", - check_sum, __le32_to_cpu(super->mpb->check_sum), + check_sum, __le32_to_cpu(super->anchor->check_sum), devname); return 2; } - return load_imsm_disk(fd, super, devname, 0); + /* FIXME the BBM log is disk specific so we cannot use this global + * buffer for all disks. Ok for now since we only look at the global + * bbm_log_size parameter to gate assembly + */ + super->bbm_log = __get_imsm_bbm_log(super->anchor); + + rc = load_imsm_disk(fd, super, devname, 0); + if (rc == 0) + rc = parse_raid_devices(super); + + return rc; } -struct superswitch super_imsm_container; +static void __free_imsm_disk(struct dl *d) +{ + if (d->fd >= 0) + close(d->fd); + if (d->devname) + free(d->devname); + free(d); +} static void free_imsm_disks(struct intel_super *super) { while (super->disks) { struct dl *d = super->disks; super->disks = d->next; - if (d->fd >= 0) - close(d->fd); - if (d->devname) - free(d->devname); - free(d); + __free_imsm_disk(d); } } -static void free_imsm(struct intel_super *super) +/* free all the pieces hanging off of a super pointer */ +static void __free_imsm(struct intel_super *super, int free_disks) { - if (super->mpb) - free(super->mpb); - free_imsm_disks(super); - free(super); + int i; + + if (super->buf) { + free(super->buf); + super->buf = NULL; + } + if (free_disks) + free_imsm_disks(super); + for (i = 0; i < IMSM_MAX_RAID_DEVS; i++) + if (super->dev_tbl[i]) { + free(super->dev_tbl[i]); + super->dev_tbl[i] = NULL; + } } +static void free_imsm(struct intel_super *super) +{ + __free_imsm(super, 1); + free(super); +} static void free_super_imsm(struct supertype *st) { @@ -887,7 +1306,7 @@ static struct intel_super *alloc_super(int creating_imsm) if (super) { memset(super, 0, sizeof(*super)); super->creating_imsm = creating_imsm; - super->creating_dev = -1; + super->current_vol = -1; } return super; @@ -920,7 +1339,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, if (!super) return 1; - /* find the most up to date disk in this array */ + /* find the most up to date disk in this array, skipping spares */ for (sd = sra->devs; sd; sd = sd->next) { sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY); @@ -932,7 +1351,10 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, if (!keep_fd) close(dfd); if (rv == 0) { - gen = __le32_to_cpu(super->mpb->generation_num); + if (super->anchor->num_raid_devs == 0) + gen = 0; + else + gen = __le32_to_cpu(super->anchor->generation_num); if (!best || gen > bestgen) { bestgen = gen; best = sd; @@ -962,10 +1384,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, return 2; } - /* reset the disk list */ - free_imsm_disks(super); - - /* populate disk list */ + /* re-parse the disk list with the current anchor */ for (sd = sra->devs ; sd ; sd = sd->next) { sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY); @@ -978,13 +1397,21 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, close(dfd); } + if (st->subarray[0]) { + if (atoi(st->subarray) <= super->anchor->num_raid_devs) + super->current_vol = atoi(st->subarray); + else + return 1; + } + *sbp = super; + st->container_dev = fd2devnum(fd); if (st->ss == NULL) { - st->ss = &super_imsm_container; + st->ss = &super_imsm; st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; - st->container_dev = fd2devnum(fd); } + st->loaded_container = 1; return 0; } @@ -999,6 +1426,8 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0) return 0; #endif + if (st->subarray[0]) + return 1; /* FIXME */ super = alloc_super(0); if (!super) { @@ -1025,53 +1454,32 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; } + st->loaded_container = 0; return 0; } -static int init_zero_imsm(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, - char *homehost, int *uuid) +static __u16 info_to_blocks_per_strip(mdu_array_info_t *info) { - st->sb = NULL; - return 0; + if (info->level == 1) + return 128; + return info->chunk_size >> 9; } -static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, - char *homehost, int *uuid) +static __u32 info_to_num_data_stripes(mdu_array_info_t *info) { - /* This is primarily called by Create when creating a new array. - * We will then get add_to_super called for each component, and then - * write_init_super called to write it out to each device. - * For IMSM, Create can create on fresh devices or on a pre-existing - * array. - * To create on a pre-existing array a different method will be called. - * This one is just for fresh drives. - */ - struct intel_super *super; - struct imsm_super *mpb; - size_t mpb_size; + __u32 num_stripes; - super = alloc_super(1); - if (!super) - return 0; - mpb_size = disks_to_mpb_size(info->nr_disks); - mpb = malloc(mpb_size); - if (!mpb) { - free(super); - return 0; - } - memset(mpb, 0, mpb_size); + num_stripes = (info->size * 2) / info_to_blocks_per_strip(info); + if (info->level == 1) + num_stripes /= 2; - memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE)); - memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5, - strlen(MPB_VERSION_RAID5)); - mpb->mpb_size = mpb_size; + return num_stripes; +} - super->mpb = mpb; - st->sb = super; - return 1; +static __u32 info_to_blocks_per_member(mdu_array_info_t *info) +{ + return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1); } static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, @@ -1082,15 +1490,15 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, * so st->sb is already set. */ struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; + struct imsm_super *mpb = super->anchor; struct imsm_dev *dev; struct imsm_vol *vol; struct imsm_map *map; int idx = mpb->num_raid_devs; int i; unsigned long long array_blocks; - unsigned long long sz; __u32 offset = 0; + size_t size_old, size_new; if (mpb->num_raid_devs >= 2) { fprintf(stderr, Name": This imsm-container already has the " @@ -1098,9 +1506,37 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, return 0; } - super->creating_dev = idx; - mpb->num_raid_devs++; - dev = get_imsm_dev(mpb, idx); + /* ensure the mpb is large enough for the new data */ + size_old = __le32_to_cpu(mpb->mpb_size); + size_new = disks_to_mpb_size(info->nr_disks); + if (size_new > size_old) { + void *mpb_new; + size_t size_round = ROUND_UP(size_new, 512); + + if (posix_memalign(&mpb_new, 512, size_round) != 0) { + fprintf(stderr, Name": could not allocate new mpb\n"); + return 0; + } + memcpy(mpb_new, mpb, size_old); + free(mpb); + mpb = mpb_new; + super->anchor = mpb_new; + mpb->mpb_size = __cpu_to_le32(size_new); + memset(mpb_new + size_old, 0, size_round - size_old); + } + super->current_vol = idx; + /* when creating the first raid device in this container set num_disks + * to zero, i.e. delete this spare and add raid member devices in + * add_to_super_imsm_volume() + */ + if (super->current_vol == 0) + mpb->num_disks = 0; + sprintf(st->subarray, "%d", idx); + dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1)); + if (!dev) { + fprintf(stderr, Name": could not allocate raid device\n"); + return 0; + } strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); array_blocks = calc_array_size(info->level, info->raid_disks, info->layout, info->chunk_size, @@ -1114,45 +1550,144 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol->migr_type = 0; vol->dirty = 0; for (i = 0; i < idx; i++) { - struct imsm_dev *prev = get_imsm_dev(mpb, i); - struct imsm_map *pmap = &prev->vol.map[0]; + struct imsm_dev *prev = get_imsm_dev(super, i); + struct imsm_map *pmap = get_imsm_map(prev, 0); offset += __le32_to_cpu(pmap->blocks_per_member); offset += IMSM_RESERVED_SECTORS; } - map = &vol->map[0]; + map = get_imsm_map(dev, 0); map->pba_of_lba0 = __cpu_to_le32(offset); - sz = info->size * 2; - map->blocks_per_member = __cpu_to_le32(sz); - map->blocks_per_strip = __cpu_to_le16(info->chunk_size >> 9); - map->num_data_stripes = __cpu_to_le32(sz / (info->chunk_size >> 9)); + map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info)); + map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); + map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info)); map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL; + + if (info->level == 1 && info->raid_disks > 2) { + fprintf(stderr, Name": imsm does not support more than 2 disks" + "in a raid1 volume\n"); + return 0; + } if (info->level == 10) map->raid_level = 1; else map->raid_level = info->level; + map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { /* initialized in add_to_super */ - map->disk_ord_tbl[i] = __cpu_to_le32(0); + set_imsm_ord_tbl_ent(map, i, 0); + } + mpb->num_raid_devs++; + super->dev_tbl[super->current_vol] = dev; + + return 1; +} + +static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, + unsigned long long size, char *name, + char *homehost, int *uuid) +{ + /* This is primarily called by Create when creating a new array. + * We will then get add_to_super called for each component, and then + * write_init_super called to write it out to each device. + * For IMSM, Create can create on fresh devices or on a pre-existing + * array. + * To create on a pre-existing array a different method will be called. + * This one is just for fresh drives. + */ + struct intel_super *super; + struct imsm_super *mpb; + size_t mpb_size; + + if (!info) { + st->sb = NULL; + return 0; + } + if (st->sb) + return init_super_imsm_volume(st, info, size, name, homehost, + uuid); + + super = alloc_super(1); + if (!super) + return 0; + mpb_size = disks_to_mpb_size(info->nr_disks); + if (posix_memalign(&super->buf, 512, mpb_size) != 0) { + free(super); + return 0; } + mpb = super->buf; + memset(mpb, 0, mpb_size); + + memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE)); + memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5, + strlen(MPB_VERSION_RAID5)); + mpb->mpb_size = mpb_size; + st->sb = super; return 1; } +#ifndef MDASSEMBLE +static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, + int fd, char *devname) +{ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + struct dl *dl; + struct imsm_dev *dev; + struct imsm_map *map; + __u32 status; + + dev = get_imsm_dev(super, super->current_vol); + map = get_imsm_map(dev, 0); + + for (dl = super->disks; dl ; dl = dl->next) + if (dl->major == dk->major && + dl->minor == dk->minor) + break; + + if (!dl || ! (dk->state & (1<index < 0) { + dl->index = super->anchor->num_disks; + super->anchor->num_disks++; + } + set_imsm_ord_tbl_ent(map, dk->number, dl->index); + status = CONFIGURED_DISK | USABLE_DISK; + dl->disk.status = __cpu_to_le32(status); + + /* if we are creating the first raid device update the family number */ + if (super->current_vol == 0) { + __u32 sum; + struct imsm_dev *_dev = __get_imsm_dev(mpb, 0); + struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index); + + *_dev = *dev; + *_disk = dl->disk; + sum = __gen_imsm_checksum(mpb); + mpb->family_num = __cpu_to_le32(sum); + } +} + static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct imsm_disk *disk; struct dl *dd; unsigned long long size; __u32 status, id; int rv; struct stat stb; + if (super->current_vol >= 0) { + add_to_super_imsm_volume(st, dk, fd, devname); + return; + } + fstat(fd, &stb); dd = malloc(sizeof(*dd)); if (!dd) { @@ -1163,115 +1698,217 @@ static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, memset(dd, 0, sizeof(*dd)); dd->major = major(stb.st_rdev); dd->minor = minor(stb.st_rdev); - dd->index = dk->number; + dd->index = -1; dd->devname = devname ? strdup(devname) : NULL; - dd->next = super->disks; dd->fd = fd; rv = imsm_read_serial(fd, devname, dd->serial); if (rv) { fprintf(stderr, - Name ": failed to retrieve scsi serial " - "using \'%s\' instead\n", devname); - strcpy((char *) dd->serial, devname); + Name ": failed to retrieve scsi serial, aborting\n"); + free(dd); + abort(); } - if (mpb->num_disks <= dk->number) - mpb->num_disks = dk->number + 1; - - disk = get_imsm_disk(mpb, dk->number); get_dev_size(fd, NULL, &size); size /= 512; status = USABLE_DISK | SPARE_DISK; - strcpy((char *) disk->serial, (char *) dd->serial); - disk->total_blocks = __cpu_to_le32(size); - disk->status = __cpu_to_le32(status); + serialcpy(dd->disk.serial, dd->serial); + dd->disk.total_blocks = __cpu_to_le32(size); + dd->disk.status = __cpu_to_le32(status); if (sysfs_disk_to_scsi_id(fd, &id) == 0) - disk->scsi_id = __cpu_to_le32(id); + dd->disk.scsi_id = __cpu_to_le32(id); else - disk->scsi_id = __cpu_to_le32(0); - - /* update the family number if we are creating a container */ - if (super->creating_imsm) - mpb->family_num = __cpu_to_le32(gen_imsm_checksum(mpb)); - - super->disks = dd; + dd->disk.scsi_id = __cpu_to_le32(0); + + if (st->update_tail) { + dd->next = super->add; + super->add = dd; + } else { + dd->next = super->disks; + super->disks = dd; + } } -static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, - int fd, char *devname) +static int store_imsm_mpb(int fd, struct intel_super *super); + +/* spare records have their own family number and do not have any defined raid + * devices + */ +static int write_super_imsm_spares(struct intel_super *super, int doclose) { - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct dl *dl; - struct imsm_dev *dev; - struct imsm_map *map; - struct imsm_disk *disk; - __u32 status; + struct imsm_super mpb_save; + struct imsm_super *mpb = super->anchor; + __u32 sum; + struct dl *d; - if (super->creating_dev == -1) { - fprintf(stderr, Name ": no active raid device\n"); - abort(); - } + mpb_save = *mpb; + mpb->num_raid_devs = 0; + mpb->num_disks = 1; + mpb->mpb_size = sizeof(struct imsm_super); + mpb->generation_num = __cpu_to_le32(1UL); - dev = get_imsm_dev(mpb, super->creating_dev); - map = &dev->vol.map[0]; + for (d = super->disks; d; d = d->next) { + if (d->index != -1) + continue; - for (dl = super->disks; dl ; dl = dl->next) - if (dl->major == dk->major && - dl->minor == dk->minor) - break; - if (!dl || ! (dk->state & (1<disk[0] = d->disk; + sum = __gen_imsm_checksum(mpb); + mpb->family_num = __cpu_to_le32(sum); + sum = __gen_imsm_checksum(mpb); + mpb->check_sum = __cpu_to_le32(sum); - map->disk_ord_tbl[dk->number] = __cpu_to_le32(dl->index); + if (store_imsm_mpb(d->fd, super)) { + fprintf(stderr, "%s: failed for device %d:%d %s\n", + __func__, d->major, d->minor, strerror(errno)); + *mpb = mpb_save; + return 1; + } + if (doclose) { + close(d->fd); + d->fd = -1; + } + } - disk = get_imsm_disk(mpb, dl->index); - status = CONFIGURED_DISK | USABLE_DISK; - disk->status = __cpu_to_le32(status); + *mpb = mpb_save; + return 0; } -static int store_imsm_mpb(int fd, struct intel_super *super); - static int write_super_imsm(struct intel_super *super, int doclose) { - struct imsm_super *mpb = super->mpb; + struct imsm_super *mpb = super->anchor; struct dl *d; __u32 generation; __u32 sum; + int spares = 0; + int i; + __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk); /* 'generation' is incremented everytime the metadata is written */ generation = __le32_to_cpu(mpb->generation_num); generation++; mpb->generation_num = __cpu_to_le32(generation); + for (d = super->disks; d; d = d->next) { + if (d->index == -1) + spares++; + else { + mpb->disk[d->index] = d->disk; + mpb_size += sizeof(struct imsm_disk); + } + } + + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = __get_imsm_dev(mpb, i); + + imsm_copy_dev(dev, super->dev_tbl[i]); + mpb_size += sizeof_imsm_dev(dev, 0); + } + mpb_size += __le32_to_cpu(mpb->bbm_log_size); + mpb->mpb_size = __cpu_to_le32(mpb_size); + /* recalculate checksum */ - sum = gen_imsm_checksum(mpb); + sum = __gen_imsm_checksum(mpb); mpb->check_sum = __cpu_to_le32(sum); + /* write the mpb for disks that compose raid devices */ for (d = super->disks; d ; d = d->next) { - if (store_imsm_mpb(d->fd, super)) { + if (d->index < 0) + continue; + if (store_imsm_mpb(d->fd, super)) fprintf(stderr, "%s: failed for device %d:%d %s\n", __func__, d->major, d->minor, strerror(errno)); - return 0; - } if (doclose) { close(d->fd); d->fd = -1; } } - return 1; + if (spares) + return write_super_imsm_spares(super, doclose); + + return 0; +} + + +static int create_array(struct supertype *st) +{ + size_t len; + struct imsm_update_create_array *u; + struct intel_super *super = st->sb; + struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); + + len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0); + u = malloc(len); + if (!u) { + fprintf(stderr, "%s: failed to allocate update buffer\n", + __func__); + return 1; + } + + u->type = update_create_array; + u->dev_idx = super->current_vol; + imsm_copy_dev(&u->dev, dev); + append_metadata_update(st, u, len); + + return 0; +} + +static int _add_disk(struct supertype *st) +{ + struct intel_super *super = st->sb; + size_t len; + struct imsm_update_add_disk *u; + + if (!super->add) + return 0; + + len = sizeof(*u); + u = malloc(len); + if (!u) { + fprintf(stderr, "%s: failed to allocate update buffer\n", + __func__); + return 1; + } + + u->type = update_add_disk; + append_metadata_update(st, u, len); + + return 0; } static int write_init_super_imsm(struct supertype *st) { - return write_super_imsm(st->sb, 1); + if (st->update_tail) { + /* queue the recently created array / added disk + * as a metadata update */ + struct intel_super *super = st->sb; + struct dl *d; + int rv; + + /* determine if we are creating a volume or adding a disk */ + if (super->current_vol < 0) { + /* in the add disk case we are running in mdmon + * context, so don't close fd's + */ + return _add_disk(st); + } else + rv = create_array(st); + + for (d = super->disks; d ; d = d->next) { + close(d->fd); + d->fd = -1; + } + + return rv; + } else + return write_super_imsm(st->sb, 1); } +#endif static int store_zero_imsm(struct supertype *st, int fd) { unsigned long long dsize; - char buf[512]; + void *buf; get_dev_size(fd, NULL, &dsize); @@ -1279,163 +1916,47 @@ static int store_zero_imsm(struct supertype *st, int fd) if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) return 1; - memset(buf, 0, sizeof(buf)); - if (write(fd, buf, sizeof(buf)) != sizeof(buf)) + if (posix_memalign(&buf, 512, 512) != 0) return 1; + memset(buf, 0, 512); + if (write(fd, buf, 512) != 512) + return 1; return 0; } -static void getinfo_super_n_imsm_container(struct supertype *st, struct mdinfo *info) +static int imsm_bbm_log_size(struct imsm_super *mpb) { - /* just need offset and size... - * of the metadata - */ - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct imsm_disk *disk = get_imsm_disk(mpb, info->disk.number); - - info->data_offset = __le32_to_cpu(disk->total_blocks) - - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS); - info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + return __le32_to_cpu(mpb->bbm_log_size); } -static void getinfo_super_n_imsm_volume(struct supertype *st, struct mdinfo *info) +#ifndef MDASSEMBLE +static int validate_geometry_imsm_container(struct supertype *st, int level, + int layout, int raiddisks, int chunk, + unsigned long long size, char *dev, + unsigned long long *freesize, + int verbose) { - /* Find the particular details for info->disk.raid_disk. - * This includes data_offset, component_size, - */ - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct imsm_dev *dev = get_imsm_dev(mpb, super->creating_dev); - struct imsm_map *map = &dev->vol.map[0]; - - info->data_offset = __le32_to_cpu(map->pba_of_lba0); - info->component_size = __le32_to_cpu(map->blocks_per_member); -} + int fd; + unsigned long long ldsize; -static int validate_geometry_imsm(struct supertype *st, int level, int layout, - int raiddisks, int chunk, unsigned long long size, - char *dev, unsigned long long *freesize) -{ - int fd, cfd; - struct mdinfo *sra; + if (level != LEVEL_CONTAINER) + return 0; + if (!dev) + return 1; - /* if given unused devices create a container - * if given given devices in a container create a member volume - */ - if (level == LEVEL_CONTAINER) { - st->ss = &super_imsm_container; - if (dev) { - /* validate the container, dev == NULL */ - int rv = st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, - size, - NULL, freesize); - if (rv) - return rv; - } - return st->ss->validate_geometry(st, level, layout, raiddisks, - chunk, size, dev, freesize); + fd = open(dev, O_RDONLY|O_EXCL, 0); + if (fd < 0) { + if (verbose) + fprintf(stderr, Name ": imsm: Cannot open %s: %s\n", + dev, strerror(errno)); + return 0; } - - if (st->sb) { - /* creating in a given container */ - st->ss = &super_imsm_volume; - if (dev) { - int rv = st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, - size, - NULL, freesize); - if (rv) - return rv; - } - return st->ss->validate_geometry(st, level, layout, raiddisks, - chunk, size, dev, freesize); + if (!get_dev_size(fd, dev, &ldsize)) { + close(fd); + return 0; } - - /* limit creation to the following levels */ - if (!dev) - switch (level) { - case 0: - case 1: - case 10: - case 5: - break; - default: - return 1; - } - - /* This device needs to be a device in an 'imsm' container */ - fd = open(dev, O_RDONLY|O_EXCL, 0); - if (fd >= 0) { - fprintf(stderr, - Name ": Cannot create this array on device %s\n", - dev); - close(fd); - return 0; - } - if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) { - fprintf(stderr, Name ": Cannot open %s: %s\n", - dev, strerror(errno)); - return 0; - } - /* Well, it is in use by someone, maybe an 'imsm' container. */ - cfd = open_container(fd); - if (cfd < 0) { - close(fd); - fprintf(stderr, Name ": Cannot use %s: It is busy\n", - dev); - return 0; - } - sra = sysfs_read(cfd, 0, GET_VERSION); - close(fd); - if (sra && sra->array.major_version == -1 && - strcmp(sra->text_version, "imsm") == 0) { - /* This is a member of a imsm container. Load the container - * and try to create a volume - */ - struct intel_super *super; - st->ss = &super_imsm_volume; - if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) { - st->sb = super; - st->container_dev = fd2devnum(cfd); - close(cfd); - return st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, size, - dev, freesize); - } - close(cfd); - } else /* may belong to another container */ - return 0; - - return 1; -} - -static int validate_geometry_imsm_container(struct supertype *st, int level, - int layout, int raiddisks, int chunk, - unsigned long long size, char *dev, - unsigned long long *freesize) -{ - int fd; - unsigned long long ldsize; - - if (level != LEVEL_CONTAINER) - return 0; - if (!dev) - return 1; - - fd = open(dev, O_RDONLY|O_EXCL, 0); - if (fd < 0) { - fprintf(stderr, Name ": Cannot open %s: %s\n", - dev, strerror(errno)); - return 0; - } - if (!get_dev_size(fd, dev, &ldsize)) { - close(fd); - return 0; - } - close(fd); + close(fd); *freesize = avail_size_imsm(st, ldsize >> 9); @@ -1448,7 +1969,8 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, static int validate_geometry_imsm_volume(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, char *dev, - unsigned long long *freesize) + unsigned long long *freesize, + int verbose) { struct stat stb; struct intel_super *super = st->sb; @@ -1462,8 +1984,9 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 0; if (level == 1 && raiddisks > 2) { - fprintf(stderr, Name ": imsm does not support more than 2 " - "in a raid1 configuration\n"); + if (verbose) + fprintf(stderr, Name ": imsm does not support more " + "than 2 in a raid1 configuration\n"); return 0; } @@ -1473,15 +1996,18 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, if (!dev) { /* General test: make sure there is space for - * 'raiddisks' device extents of size 'size'. + * 'raiddisks' device extents of size 'size' at a given + * offset */ unsigned long long minsize = size*2 /* convert to blocks */; + unsigned long long start_offset = ~0ULL; int dcnt = 0; if (minsize == 0) minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; for (dl = super->disks; dl ; dl = dl->next) { int found = 0; + pos = 0; i = 0; e = get_extents(super, dl); if (!e) continue; @@ -1490,6 +2016,13 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, esize = e[i].start - pos; if (esize >= minsize) found = 1; + if (found && start_offset == ~0ULL) { + start_offset = pos; + break; + } else if (found && pos != start_offset) { + found = 0; + break; + } pos = e[i].start + e[i].size; i++; } while (e[i-1].size); @@ -1498,9 +2031,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, free(e); } if (dcnt < raiddisks) { - fprintf(stderr, Name ": Not enough devices with space " - "for this array (%d < %d)\n", - dcnt, raiddisks); + if (verbose) + fprintf(stderr, Name ": imsm: Not enough " + "devices with space for this array " + "(%d < %d)\n", + dcnt, raiddisks); return 0; } return 1; @@ -1516,8 +2051,9 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, break; } if (!dl) { - fprintf(stderr, Name ": %s is not in the same imsm set\n", - dev); + if (verbose) + fprintf(stderr, Name ": %s is not in the " + "same imsm set\n", dev); return 0; } e = get_extents(super, dl); @@ -1536,6 +2072,95 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 1; } +static int validate_geometry_imsm(struct supertype *st, int level, int layout, + int raiddisks, int chunk, unsigned long long size, + char *dev, unsigned long long *freesize, + int verbose) +{ + int fd, cfd; + struct mdinfo *sra; + + /* if given unused devices create a container + * if given given devices in a container create a member volume + */ + if (level == LEVEL_CONTAINER) { + /* Must be a fresh device to add to a container */ + return validate_geometry_imsm_container(st, level, layout, + raiddisks, chunk, size, + dev, freesize, + verbose); + } + + if (st->sb) { + /* creating in a given container */ + return validate_geometry_imsm_volume(st, level, layout, + raiddisks, chunk, size, + dev, freesize, verbose); + } + + /* limit creation to the following levels */ + if (!dev) + switch (level) { + case 0: + case 1: + case 10: + case 5: + break; + default: + return 1; + } + + /* This device needs to be a device in an 'imsm' container */ + fd = open(dev, O_RDONLY|O_EXCL, 0); + if (fd >= 0) { + if (verbose) + fprintf(stderr, + Name ": Cannot create this array on device %s\n", + dev); + close(fd); + return 0; + } + if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) { + if (verbose) + fprintf(stderr, Name ": Cannot open %s: %s\n", + dev, strerror(errno)); + return 0; + } + /* Well, it is in use by someone, maybe an 'imsm' container. */ + cfd = open_container(fd); + if (cfd < 0) { + close(fd); + if (verbose) + fprintf(stderr, Name ": Cannot use %s: It is busy\n", + dev); + return 0; + } + sra = sysfs_read(cfd, 0, GET_VERSION); + close(fd); + if (sra && sra->array.major_version == -1 && + strcmp(sra->text_version, "imsm") == 0) { + /* This is a member of a imsm container. Load the container + * and try to create a volume + */ + struct intel_super *super; + + if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) { + st->sb = super; + st->container_dev = fd2devnum(cfd); + close(cfd); + return validate_geometry_imsm_volume(st, level, layout, + raiddisks, chunk, + size, dev, + freesize, verbose); + } + close(cfd); + } else /* may belong to another container */ + return 0; + + return 1; +} +#endif /* MDASSEMBLE */ + static struct mdinfo *container_content_imsm(struct supertype *st) { /* Given a container loaded by load_super_imsm_all, @@ -1547,87 +2172,82 @@ static struct mdinfo *container_content_imsm(struct supertype *st) * and create appropriate device mdinfo. */ struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; + struct imsm_super *mpb = super->anchor; struct mdinfo *rest = NULL; int i; + /* do not assemble arrays that might have bad blocks */ + if (imsm_bbm_log_size(super->anchor)) { + fprintf(stderr, Name ": BBM log found in metadata. " + "Cannot activate array(s).\n"); + return NULL; + } + for (i = 0; i < mpb->num_raid_devs; i++) { - struct imsm_dev *dev = get_imsm_dev(mpb, i); - struct imsm_vol *vol = &dev->vol; - struct imsm_map *map = vol->map; + struct imsm_dev *dev = get_imsm_dev(super, i); + struct imsm_map *map = get_imsm_map(dev, 0); struct mdinfo *this; - __u64 sz; int slot; this = malloc(sizeof(*this)); memset(this, 0, sizeof(*this)); this->next = rest; - rest = this; - - this->array.major_version = 2000; - get_imsm_numerical_version(mpb, &this->array.minor_version, - &this->array.patch_version); - this->array.level = get_imsm_raid_level(map); - this->array.raid_disks = map->num_members; - this->array.layout = imsm_level_to_layout(this->array.level); - this->array.md_minor = -1; - this->array.ctime = 0; - this->array.utime = 0; - this->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; - this->array.state = !vol->dirty; - this->container_member = i; - if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) - this->resync_start = 0; - else - this->resync_start = ~0ULL; - - strncpy(this->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN); - this->name[MAX_RAID_SERIAL_LEN] = 0; - - sprintf(this->text_version, "/%s/%d", - devnum2devname(st->container_dev), - this->container_member); - - memset(this->uuid, 0, sizeof(this->uuid)); - - sz = __le32_to_cpu(dev->size_high); - sz <<= 32; - sz += __le32_to_cpu(dev->size_low); - this->component_size = sz; - this->array.size = this->component_size / 2; + super->current_vol = i; + getinfo_super_imsm_volume(st, this); for (slot = 0 ; slot < map->num_members; slot++) { - struct imsm_disk *disk; struct mdinfo *info_d; struct dl *d; int idx; + int skip; __u32 s; + __u32 ord; - idx = __le32_to_cpu(map->disk_ord_tbl[slot] & ~(0xff << 24)); + skip = 0; + idx = get_imsm_disk_idx(dev, slot); + ord = get_imsm_ord_tbl_ent(dev, slot); for (d = super->disks; d ; d = d->next) if (d->index == idx) break; if (d == NULL) - break; /* shouldn't this be continue ?? */ + skip = 1; + + s = d ? __le32_to_cpu(d->disk.status) : 0; + if (s & FAILED_DISK) + skip = 1; + if (!(s & USABLE_DISK)) + skip = 1; + if (ord & IMSM_ORD_REBUILD) + skip = 1; + + /* + * if we skip some disks the array will be assmebled degraded; + * reset resync start to avoid a dirty-degraded situation + * + * FIXME handle dirty degraded + */ + if (skip && !dev->vol.dirty) + this->resync_start = ~0ULL; + if (skip) + continue; info_d = malloc(sizeof(*info_d)); - if (!info_d) - break; /* ditto ?? */ + if (!info_d) { + fprintf(stderr, Name ": failed to allocate disk" + " for volume %s\n", (char *) dev->volume); + free(this); + this = rest; + break; + } memset(info_d, 0, sizeof(*info_d)); info_d->next = this->devs; this->devs = info_d; - disk = get_imsm_disk(mpb, idx); - s = __le32_to_cpu(disk->status); - info_d->disk.number = d->index; info_d->disk.major = d->major; info_d->disk.minor = d->minor; info_d->disk.raid_disk = slot; - info_d->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0; - info_d->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0; - info_d->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0; this->array.working_disks++; @@ -1637,27 +2257,38 @@ static struct mdinfo *container_content_imsm(struct supertype *st) if (d->devname) strcpy(info_d->name, d->devname); } + rest = this; } return rest; } +#ifndef MDASSEMBLE static int imsm_open_new(struct supertype *c, struct active_array *a, char *inst) { + struct intel_super *super = c->sb; + struct imsm_super *mpb = super->anchor; + + if (atoi(inst) >= mpb->num_raid_devs) { + fprintf(stderr, "%s: subarry index %d, out of range\n", + __func__, atoi(inst)); + return -ENODEV; + } + dprintf("imsm: open_new %s\n", inst); a->info.container_member = atoi(inst); return 0; } -static __u8 imsm_check_degraded(struct imsm_super *mpb, int n, int failed) +static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed) { - struct imsm_dev *dev = get_imsm_dev(mpb, n); - struct imsm_map *map = dev->vol.map; + struct imsm_map *map = get_imsm_map(dev, 0); if (!failed) - return map->map_state; + return map->map_state == IMSM_T_STATE_UNINITIALIZED ? + IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL; switch (get_imsm_raid_level(map)) { case 0: @@ -1678,22 +2309,24 @@ static __u8 imsm_check_degraded(struct imsm_super *mpb, int n, int failed) int device_per_mirror = 2; /* FIXME is this always the case? * and are they always adjacent? */ - int failed = 0; + int r10fail = 0; int i; for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(map, i); - struct imsm_disk *disk = get_imsm_disk(mpb, idx); + int idx = get_imsm_disk_idx(dev, i); + struct imsm_disk *disk = get_imsm_disk(super, idx); - if (__le32_to_cpu(disk->status) & FAILED_DISK) - failed++; + if (!disk) + r10fail++; + else if (__le32_to_cpu(disk->status) & FAILED_DISK) + r10fail++; - if (failed >= device_per_mirror) + if (r10fail >= device_per_mirror) return IMSM_T_STATE_FAILED; - /* reset 'failed' for next mirror set */ + /* reset 'r10fail' for next mirror set */ if (!((i + 1) % device_per_mirror)) - failed = 0; + r10fail = 0; } return IMSM_T_STATE_DEGRADED; @@ -1711,65 +2344,126 @@ static __u8 imsm_check_degraded(struct imsm_super *mpb, int n, int failed) return map->map_state; } -static int imsm_count_failed(struct imsm_super *mpb, struct imsm_map *map) +static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) { int i; int failed = 0; struct imsm_disk *disk; + struct imsm_map *map = get_imsm_map(dev, 0); for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(map, i); + __u32 ord = get_imsm_ord_tbl_ent(dev, i); + int idx = ord_to_idx(ord); - disk = get_imsm_disk(mpb, idx); - if (__le32_to_cpu(disk->status) & FAILED_DISK) + disk = get_imsm_disk(super, idx); + if (!disk || + __le32_to_cpu(disk->status) & FAILED_DISK || + ord & IMSM_ORD_REBUILD) failed++; } return failed; } -static void imsm_set_array_state(struct active_array *a, int consistent) +static int is_resyncing(struct imsm_dev *dev) +{ + struct imsm_map *migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (dev->vol.migr_type == 0) + return 1; + + migr_map = get_imsm_map(dev, 1); + + if (migr_map->map_state == IMSM_T_STATE_NORMAL) + return 1; + else + return 0; +} + +static int is_rebuilding(struct imsm_dev *dev) +{ + struct imsm_map *migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (dev->vol.migr_type == 0) + return 0; + + migr_map = get_imsm_map(dev, 1); + + if (migr_map->map_state == IMSM_T_STATE_DEGRADED) + return 1; + else + return 0; +} + +/* Handle dirty -> clean transititions and resync. Degraded and rebuild + * states are handled in imsm_set_disk() with one exception, when a + * resync is stopped due to a new failure this routine will set the + * 'degraded' state for the array. + */ +static int imsm_set_array_state(struct active_array *a, int consistent) { int inst = a->info.container_member; struct intel_super *super = a->container->sb; - struct imsm_dev *dev = get_imsm_dev(super->mpb, inst); - struct imsm_map *map = &dev->vol.map[0]; - int dirty = !consistent; - int failed; - __u8 map_state; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, 0); + int failed = imsm_count_failed(super, dev); + __u8 map_state = imsm_check_degraded(super, dev, failed); + + if (consistent == 2 && + (a->resync_start != ~0ULL || + map_state != IMSM_T_STATE_NORMAL || + dev->vol.migr_state)) + consistent = 0; if (a->resync_start == ~0ULL) { - failed = imsm_count_failed(super->mpb, map); - map_state = imsm_check_degraded(super->mpb, inst, failed); - if (!failed) - map_state = IMSM_T_STATE_NORMAL; - if (map->map_state != map_state) { - dprintf("imsm: map_state %d: %d\n", - inst, map_state); + /* complete intialization / resync, + * recovery is completed in ->set_disk + */ + if (is_resyncing(dev)) { + dprintf("imsm: mark resync done\n"); + dev->vol.migr_state = 0; map->map_state = map_state; super->updates_pending++; } + } else if (!is_resyncing(dev) && !failed) { + /* mark the start of the init process if nothing is failed */ + dprintf("imsm: mark resync start (%llu)\n", a->resync_start); + map->map_state = map_state; + migrate(dev, IMSM_T_STATE_NORMAL, + map->map_state == IMSM_T_STATE_NORMAL); + super->updates_pending++; } - if (dev->vol.dirty != dirty) { + /* mark dirty / clean */ + if (dev->vol.dirty != !consistent) { dprintf("imsm: mark '%s' (%llu)\n", - dirty?"dirty":"clean", a->resync_start); - - dev->vol.dirty = dirty; + consistent ? "clean" : "dirty", a->resync_start); + if (consistent) + dev->vol.dirty = 0; + else + dev->vol.dirty = 1; super->updates_pending++; } + return consistent; } static void imsm_set_disk(struct active_array *a, int n, int state) { int inst = a->info.container_member; struct intel_super *super = a->container->sb; - struct imsm_dev *dev = get_imsm_dev(super->mpb, inst); - struct imsm_map *map = dev->vol.map; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, 0); struct imsm_disk *disk; + int failed; __u32 status; - int failed = 0; - int new_failure = 0; + __u32 ord; + __u8 map_state; if (n > map->num_members) fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n", @@ -1780,33 +2474,52 @@ static void imsm_set_disk(struct active_array *a, int n, int state) dprintf("imsm: set_disk %d:%x\n", n, state); - disk = get_imsm_disk(super->mpb, get_imsm_disk_idx(map, n)); + ord = get_imsm_ord_tbl_ent(dev, n); + disk = get_imsm_disk(super, ord_to_idx(ord)); - /* check if we have seen this failure before */ + /* check for new failures */ status = __le32_to_cpu(disk->status); if ((state & DS_FAULTY) && !(status & FAILED_DISK)) { status |= FAILED_DISK; disk->status = __cpu_to_le32(status); - new_failure = 1; + disk->scsi_id = __cpu_to_le32(~(__u32)0); + memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); + super->updates_pending++; } + /* check if in_sync */ + if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) { + struct imsm_map *migr_map = get_imsm_map(dev, 1); - /** - * the number of failures have changed, count up 'failed' to determine - * degraded / failed status - */ - if (new_failure && map->map_state != IMSM_T_STATE_FAILED) - failed = imsm_count_failed(super->mpb, map); + set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord)); + super->updates_pending++; + } - if (failed) - map->map_state = imsm_check_degraded(super->mpb, inst, failed); + failed = imsm_count_failed(super, dev); + map_state = imsm_check_degraded(super, dev, failed); - if (new_failure) + /* check if recovery complete, newly degraded, or failed */ + if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) { + map->map_state = map_state; + dev->vol.migr_state = 0; + super->updates_pending++; + } else if (map_state == IMSM_T_STATE_DEGRADED && + map->map_state != map_state && + !dev->vol.migr_state) { + dprintf("imsm: mark degraded\n"); + map->map_state = map_state; + super->updates_pending++; + } else if (map_state == IMSM_T_STATE_FAILED && + map->map_state != map_state) { + dprintf("imsm: mark failed\n"); + dev->vol.migr_state = 0; + map->map_state = map_state; super->updates_pending++; + } } static int store_imsm_mpb(int fd, struct intel_super *super) { - struct imsm_super *mpb = super->mpb; + struct imsm_super *mpb = super->anchor; __u32 mpb_size = __le32_to_cpu(mpb->mpb_size); unsigned long long dsize; unsigned long long sectors; @@ -1821,7 +2534,7 @@ static int store_imsm_mpb(int fd, struct intel_super *super) if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) return 1; - if (write(fd, super->buf + 512, mpb_size - 512) != mpb_size - 512) + if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors) return 1; } @@ -1832,8 +2545,6 @@ static int store_imsm_mpb(int fd, struct intel_super *super) if (write(fd, super->buf, 512) != 512) return 1; - fsync(fd); - return 0; } @@ -1849,12 +2560,587 @@ static void imsm_sync_metadata(struct supertype *container) super->updates_pending = 0; } +static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a) +{ + struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); + int i = get_imsm_disk_idx(dev, idx); + struct dl *dl; + + for (dl = super->disks; dl; dl = dl->next) + if (dl->index == i) + break; + + if (dl && __le32_to_cpu(dl->disk.status) & FAILED_DISK) + dl = NULL; + + if (dl) + dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor); + + return dl; +} + +static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a) +{ + struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); + int idx = get_imsm_disk_idx(dev, slot); + struct imsm_map *map = get_imsm_map(dev, 0); + unsigned long long esize; + unsigned long long pos; + struct mdinfo *d; + struct extent *ex; + int j; + int found; + __u32 array_start; + __u32 status; + struct dl *dl; + + for (dl = super->disks; dl; dl = dl->next) { + /* If in this array, skip */ + for (d = a->info.devs ; d ; d = d->next) + if (d->state_fd >= 0 && + d->disk.major == dl->major && + d->disk.minor == dl->minor) { + dprintf("%x:%x already in array\n", dl->major, dl->minor); + break; + } + if (d) + continue; + + /* skip in use or failed drives */ + status = __le32_to_cpu(dl->disk.status); + if (status & FAILED_DISK || idx == dl->index) { + dprintf("%x:%x status ( %s%s)\n", + dl->major, dl->minor, + status & FAILED_DISK ? "failed " : "", + idx == dl->index ? "in use " : ""); + continue; + } + + /* Does this unused device have the requisite free space? + * We need a->info.component_size sectors + */ + ex = get_extents(super, dl); + if (!ex) { + dprintf("cannot get extents\n"); + continue; + } + found = 0; + j = 0; + pos = 0; + array_start = __le32_to_cpu(map->pba_of_lba0); + + do { + /* check that we can start at pba_of_lba0 with + * a->info.component_size of space + */ + esize = ex[j].start - pos; + if (array_start >= pos && + array_start + a->info.component_size < ex[j].start) { + found = 1; + break; + } + pos = ex[j].start + ex[j].size; + j++; + + } while (ex[j-1].size); + + free(ex); + if (!found) { + dprintf("%x:%x does not have %llu at %d\n", + dl->major, dl->minor, + a->info.component_size, + __le32_to_cpu(map->pba_of_lba0)); + /* No room */ + continue; + } else + break; + } + + return dl; +} + +static struct mdinfo *imsm_activate_spare(struct active_array *a, + struct metadata_update **updates) +{ + /** + * Find a device with unused free space and use it to replace a + * failed/vacant region in an array. We replace failed regions one a + * array at a time. The result is that a new spare disk will be added + * to the first failed array and after the monitor has finished + * propagating failures the remainder will be consumed. + * + * FIXME add a capability for mdmon to request spares from another + * container. + */ + + struct intel_super *super = a->container->sb; + int inst = a->info.container_member; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, 0); + int failed = a->info.array.raid_disks; + struct mdinfo *rv = NULL; + struct mdinfo *d; + struct mdinfo *di; + struct metadata_update *mu; + struct dl *dl; + struct imsm_update_activate_spare *u; + int num_spares = 0; + int i; + + for (d = a->info.devs ; d ; d = d->next) { + if ((d->curr_state & DS_FAULTY) && + d->state_fd >= 0) + /* wait for Removal to happen */ + return NULL; + if (d->state_fd >= 0) + failed--; + } + + dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", + inst, failed, a->info.array.raid_disks, a->info.array.level); + if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED) + return NULL; + + /* For each slot, if it is not working, find a spare */ + for (i = 0; i < a->info.array.raid_disks; i++) { + for (d = a->info.devs ; d ; d = d->next) + if (d->disk.raid_disk == i) + break; + dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0); + if (d && (d->state_fd >= 0)) + continue; + + /* + * OK, this device needs recovery. Try to re-add the previous + * occupant of this slot, if this fails add a new spare + */ + dl = imsm_readd(super, i, a); + if (!dl) + dl = imsm_add_spare(super, i, a); + if (!dl) + continue; + + /* found a usable disk with enough space */ + di = malloc(sizeof(*di)); + memset(di, 0, sizeof(*di)); + + /* dl->index will be -1 in the case we are activating a + * pristine spare. imsm_process_update() will create a + * new index in this case. Once a disk is found to be + * failed in all member arrays it is kicked from the + * metadata + */ + di->disk.number = dl->index; + + /* (ab)use di->devs to store a pointer to the device + * we chose + */ + di->devs = (struct mdinfo *) dl; + + di->disk.raid_disk = i; + di->disk.major = dl->major; + di->disk.minor = dl->minor; + di->disk.state = 0; + di->data_offset = __le32_to_cpu(map->pba_of_lba0); + di->component_size = a->info.component_size; + di->container_member = inst; + di->next = rv; + rv = di; + num_spares++; + dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor, + i, di->data_offset); + + break; + } + + if (!rv) + /* No spares found */ + return rv; + /* Now 'rv' has a list of devices to return. + * Create a metadata_update record to update the + * disk_ord_tbl for the array + */ + mu = malloc(sizeof(*mu)); + mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares); + mu->space = NULL; + mu->len = sizeof(struct imsm_update_activate_spare) * num_spares; + mu->next = *updates; + u = (struct imsm_update_activate_spare *) mu->buf; + + for (di = rv ; di ; di = di->next) { + u->type = update_activate_spare; + u->dl = (struct dl *) di->devs; + di->devs = NULL; + u->slot = di->disk.raid_disk; + u->array = inst; + u->next = u + 1; + u++; + } + (u-1)->next = NULL; + *updates = mu; + + return rv; +} + +static int disks_overlap(struct imsm_dev *d1, struct imsm_dev *d2) +{ + struct imsm_map *m1 = get_imsm_map(d1, 0); + struct imsm_map *m2 = get_imsm_map(d2, 0); + int i; + int j; + int idx; + + for (i = 0; i < m1->num_members; i++) { + idx = get_imsm_disk_idx(d1, i); + for (j = 0; j < m2->num_members; j++) + if (idx == get_imsm_disk_idx(d2, j)) + return 1; + } + + return 0; +} + +static void imsm_delete(struct intel_super *super, struct dl **dlp, int index); + +static void imsm_process_update(struct supertype *st, + struct metadata_update *update) +{ + /** + * crack open the metadata_update envelope to find the update record + * update can be one of: + * update_activate_spare - a spare device has replaced a failed + * device in an array, update the disk_ord_tbl. If this disk is + * present in all member arrays then also clear the SPARE_DISK + * flag + */ + struct intel_super *super = st->sb; + struct imsm_super *mpb; + enum imsm_update_type type = *(enum imsm_update_type *) update->buf; + + /* update requires a larger buf but the allocation failed */ + if (super->next_len && !super->next_buf) { + super->next_len = 0; + return; + } + + if (super->next_buf) { + memcpy(super->next_buf, super->buf, super->len); + free(super->buf); + super->len = super->next_len; + super->buf = super->next_buf; + + super->next_len = 0; + super->next_buf = NULL; + } + + mpb = super->anchor; + + switch (type) { + case update_activate_spare: { + struct imsm_update_activate_spare *u = (void *) update->buf; + struct imsm_dev *dev = get_imsm_dev(super, u->array); + struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *migr_map; + struct active_array *a; + struct imsm_disk *disk; + __u32 status; + __u8 to_state; + struct dl *dl; + unsigned int found; + int failed; + int victim = get_imsm_disk_idx(dev, u->slot); + int i; + + for (dl = super->disks; dl; dl = dl->next) + if (dl == u->dl) + break; + + if (!dl) { + fprintf(stderr, "error: imsm_activate_spare passed " + "an unknown disk (index: %d)\n", + u->dl->index); + return; + } + + super->updates_pending++; + + /* count failures (excluding rebuilds and the victim) + * to determine map[0] state + */ + failed = 0; + for (i = 0; i < map->num_members; i++) { + if (i == u->slot) + continue; + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + if (!disk || + __le32_to_cpu(disk->status) & FAILED_DISK) + failed++; + } + + /* adding a pristine spare, assign a new index */ + if (dl->index < 0) { + dl->index = super->anchor->num_disks; + super->anchor->num_disks++; + } + disk = &dl->disk; + status = __le32_to_cpu(disk->status); + status |= CONFIGURED_DISK; + status &= ~SPARE_DISK; + disk->status = __cpu_to_le32(status); + + /* mark rebuild */ + to_state = imsm_check_degraded(super, dev, failed); + map->map_state = IMSM_T_STATE_DEGRADED; + migrate(dev, to_state, 1); + migr_map = get_imsm_map(dev, 1); + set_imsm_ord_tbl_ent(map, u->slot, dl->index); + set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); + + /* count arrays using the victim in the metadata */ + found = 0; + for (a = st->arrays; a ; a = a->next) { + dev = get_imsm_dev(super, a->info.container_member); + for (i = 0; i < map->num_members; i++) + if (victim == get_imsm_disk_idx(dev, i)) + found++; + } + + /* delete the victim if it is no longer being + * utilized anywhere + */ + if (!found) { + struct dl **dlp; + + for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) + if ((*dlp)->index == victim) + break; + /* We know that 'manager' isn't touching anything, + * so it is safe to: + */ + imsm_delete(super, dlp, victim); + } + break; + } + case update_create_array: { + /* someone wants to create a new array, we need to be aware of + * a few races/collisions: + * 1/ 'Create' called by two separate instances of mdadm + * 2/ 'Create' versus 'activate_spare': mdadm has chosen + * devices that have since been assimilated via + * activate_spare. + * In the event this update can not be carried out mdadm will + * (FIX ME) notice that its update did not take hold. + */ + struct imsm_update_create_array *u = (void *) update->buf; + struct imsm_dev *dev; + struct imsm_map *map, *new_map; + unsigned long long start, end; + unsigned long long new_start, new_end; + int i; + int overlap = 0; + + /* handle racing creates: first come first serve */ + if (u->dev_idx < mpb->num_raid_devs) { + dprintf("%s: subarray %d already defined\n", + __func__, u->dev_idx); + return; + } + + /* check update is next in sequence */ + if (u->dev_idx != mpb->num_raid_devs) { + dprintf("%s: can not create array %d expected index %d\n", + __func__, u->dev_idx, mpb->num_raid_devs); + return; + } + + new_map = get_imsm_map(&u->dev, 0); + new_start = __le32_to_cpu(new_map->pba_of_lba0); + new_end = new_start + __le32_to_cpu(new_map->blocks_per_member); + + /* handle activate_spare versus create race: + * check to make sure that overlapping arrays do not include + * overalpping disks + */ + for (i = 0; i < mpb->num_raid_devs; i++) { + dev = get_imsm_dev(super, i); + map = get_imsm_map(dev, 0); + start = __le32_to_cpu(map->pba_of_lba0); + end = start + __le32_to_cpu(map->blocks_per_member); + if ((new_start >= start && new_start <= end) || + (start >= new_start && start <= new_end)) + overlap = 1; + if (overlap && disks_overlap(dev, &u->dev)) { + dprintf("%s: arrays overlap\n", __func__); + return; + } + } + /* check num_members sanity */ + if (new_map->num_members > mpb->num_disks) { + dprintf("%s: num_disks out of range\n", __func__); + return; + } + + /* check that prepare update was successful */ + if (!update->space) { + dprintf("%s: prepare update failed\n", __func__); + return; + } + + super->updates_pending++; + dev = update->space; + map = get_imsm_map(dev, 0); + update->space = NULL; + imsm_copy_dev(dev, &u->dev); + map = get_imsm_map(dev, 0); + super->dev_tbl[u->dev_idx] = dev; + mpb->num_raid_devs++; + + /* fix up flags */ + for (i = 0; i < map->num_members; i++) { + struct imsm_disk *disk; + __u32 status; + + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + status = __le32_to_cpu(disk->status); + status |= CONFIGURED_DISK; + status &= ~SPARE_DISK; + disk->status = __cpu_to_le32(status); + } + break; + } + case update_add_disk: + + /* we may be able to repair some arrays if disks are + * being added */ + if (super->add) { + struct active_array *a; + for (a = st->arrays; a; a = a->next) + a->check_degraded = 1; + } + /* add some spares to the metadata */ + while (super->add) { + struct dl *al; + + al = super->add; + super->add = al->next; + al->next = super->disks; + super->disks = al; + dprintf("%s: added %x:%x\n", + __func__, al->major, al->minor); + } + + break; + } +} + +static void imsm_prepare_update(struct supertype *st, + struct metadata_update *update) +{ + /** + * Allocate space to hold new disk entries, raid-device entries or a new + * mpb if necessary. The manager synchronously waits for updates to + * complete in the monitor, so new mpb buffers allocated here can be + * integrated by the monitor thread without worrying about live pointers + * in the manager thread. + */ + enum imsm_update_type type = *(enum imsm_update_type *) update->buf; + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + size_t buf_len; + size_t len = 0; + + switch (type) { + case update_create_array: { + struct imsm_update_create_array *u = (void *) update->buf; + + len = sizeof_imsm_dev(&u->dev, 1); + update->space = malloc(len); + break; + default: + break; + } + } + + /* check if we need a larger metadata buffer */ + if (super->next_buf) + buf_len = super->next_len; + else + buf_len = super->len; + + if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) { + /* ok we need a larger buf than what is currently allocated + * if this allocation fails process_update will notice that + * ->next_len is set and ->next_buf is NULL + */ + buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512); + if (super->next_buf) + free(super->next_buf); + + super->next_len = buf_len; + if (posix_memalign(&super->next_buf, buf_len, 512) != 0) + super->next_buf = NULL; + } +} + +/* must be called while manager is quiesced */ +static void imsm_delete(struct intel_super *super, struct dl **dlp, int index) +{ + struct imsm_super *mpb = super->anchor; + struct dl *iter; + struct imsm_dev *dev; + struct imsm_map *map; + int i, j, num_members; + __u32 ord; + + dprintf("%s: deleting device[%d] from imsm_super\n", + __func__, index); + + /* shift all indexes down one */ + for (iter = super->disks; iter; iter = iter->next) + if (iter->index > index) + iter->index--; + + for (i = 0; i < mpb->num_raid_devs; i++) { + dev = get_imsm_dev(super, i); + map = get_imsm_map(dev, 0); + num_members = map->num_members; + for (j = 0; j < num_members; j++) { + /* update ord entries being careful not to propagate + * ord-flags to the first map + */ + ord = get_imsm_ord_tbl_ent(dev, j); + + if (ord_to_idx(ord) <= index) + continue; + + map = get_imsm_map(dev, 0); + set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1)); + map = get_imsm_map(dev, 1); + if (map) + set_imsm_ord_tbl_ent(map, j, ord - 1); + } + } + + mpb->num_disks--; + super->updates_pending++; + if (*dlp) { + struct dl *dl = *dlp; + + *dlp = (*dlp)->next; + __free_imsm_disk(dl); + } +} +#endif /* MDASSEMBLE */ + struct superswitch super_imsm = { #ifndef MDASSEMBLE .examine_super = examine_super_imsm, .brief_examine_super = brief_examine_super_imsm, .detail_super = detail_super_imsm, .brief_detail_super = brief_detail_super_imsm, + .write_init_super = write_init_super_imsm, + .validate_geometry = validate_geometry_imsm, + .add_to_super = add_to_super_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, @@ -1866,66 +3152,23 @@ struct superswitch super_imsm = { .compare_super = compare_super_imsm, .load_super = load_super_imsm, - .init_super = init_zero_imsm, + .init_super = init_super_imsm, .store_super = store_zero_imsm, .free_super = free_super_imsm, .match_metadata_desc = match_metadata_desc_imsm, - .getinfo_super_n = getinfo_super_n_imsm_container, + .container_content = container_content_imsm, - .validate_geometry = validate_geometry_imsm, - .swapuuid = 0, .external = 1, +#ifndef MDASSEMBLE /* for mdmon */ .open_new = imsm_open_new, .load_super = load_super_imsm, .set_array_state= imsm_set_array_state, .set_disk = imsm_set_disk, .sync_metadata = imsm_sync_metadata, -}; - -/* super_imsm_container is set by validate_geometry_imsm when given a - * device that is not part of any array - */ -struct superswitch super_imsm_container = { - - .validate_geometry = validate_geometry_imsm_container, - .init_super = init_super_imsm, - .add_to_super = add_to_super_imsm, - .write_init_super = write_init_super_imsm, - .getinfo_super = getinfo_super_imsm, - .getinfo_super_n = getinfo_super_n_imsm_container, - .load_super = load_super_imsm, - -#ifndef MDASSEMBLE - .examine_super = examine_super_imsm, - .brief_examine_super = brief_examine_super_imsm, - .detail_super = detail_super_imsm, - .brief_detail_super = brief_detail_super_imsm, -#endif - - .free_super = free_super_imsm, - - .container_content = container_content_imsm, - - .swapuuid = 0, - .external = 1, -}; - -struct superswitch super_imsm_volume = { - .update_super = update_super_imsm, - .init_super = init_super_imsm_volume, - .add_to_super = add_to_super_imsm_volume, - .getinfo_super = getinfo_super_imsm_volume, - .getinfo_super_n = getinfo_super_n_imsm_volume, - .write_init_super = write_init_super_imsm, - - .load_super = load_super_imsm, - .free_super = free_super_imsm, - .match_metadata_desc = match_metadata_desc_imsm_volume, - - - .validate_geometry = validate_geometry_imsm_volume, - .swapuuid = 0, - .external = 2, + .activate_spare = imsm_activate_spare, + .process_update = imsm_process_update, + .prepare_update = imsm_prepare_update, +#endif /* MDASSEMBLE */ };