X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=9c5252d0fe1cd85bb02917f9f2bdd4f86c945e38;hb=8273f55e4f18e14c7aa67ab4c52cd37fbcac2902;hp=263e398a89181ed9643cdbf1043f2723a6ae86f6;hpb=ef609477203013666d186cf913e93f4a8843b6db;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index 263e398a..9c5252d0 100644 --- a/super-intel.c +++ b/super-intel.c @@ -132,7 +132,7 @@ struct intel_super { }; int updates_pending; /* count of pending updates for mdmon */ int creating_imsm; /* flag to indicate container creation */ - int creating_dev; /* index of raid device undergoing creation */ + int current_vol; /* index of raid device undergoing creation */ struct dl { struct dl *next; int index; @@ -147,37 +147,38 @@ struct extent { unsigned long long start, size; }; -static struct supertype *match_metadata_desc_imsm(char *arg) -{ - struct supertype *st; +/* definition of messages passed to imsm_process_update */ +enum imsm_update_type { + update_activate_spare, + update_create_array, +}; - if (strcmp(arg, "imsm") != 0 && - strcmp(arg, "default") != 0 - ) - return NULL; +struct imsm_update_activate_spare { + enum imsm_update_type type; + int disk_idx; + int slot; + int array; + struct imsm_update_activate_spare *next; +}; - st = malloc(sizeof(*st)); - memset(st, 0, sizeof(*st)); - st->ss = &super_imsm; - st->max_devs = IMSM_MAX_DEVICES; - st->minor_version = 0; - st->sb = NULL; - return st; -} +struct imsm_update_create_array { + enum imsm_update_type type; + struct imsm_dev dev; + int dev_idx; +}; -static struct supertype *match_metadata_desc_imsm_volume(char *arg) +static struct supertype *match_metadata_desc_imsm(char *arg) { struct supertype *st; - if (strcmp(arg, "imsm/volume") != 0 && - strcmp(arg, "raid") != 0 && + if (strcmp(arg, "imsm") != 0 && strcmp(arg, "default") != 0 ) return NULL; st = malloc(sizeof(*st)); memset(st, 0, sizeof(*st)); - st->ss = &super_imsm_volume; + st->ss = &super_imsm; st->max_devs = IMSM_MAX_DEVICES; st->minor_version = 0; st->sb = NULL; @@ -449,6 +450,7 @@ static void uuid_from_super_imsm(struct supertype *st, int uuid[4]) printf("%s\n", __FUNCTION__); } +#if 0 static void get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p) { @@ -474,6 +476,7 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p) *m = strtol(minor, NULL, 0); *p = strtol(patch, NULL, 0); } +#endif static int imsm_level_to_layout(int level) { @@ -490,6 +493,34 @@ static int imsm_level_to_layout(int level) return -1; } +static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) +{ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->mpb; + struct imsm_dev *dev = get_imsm_dev(mpb, super->current_vol); + struct imsm_map *map = &dev->vol.map[0]; + + info->container_member = super->current_vol; + info->array.raid_disks = map->num_members; + info->array.level = get_imsm_raid_level(map); + info->array.layout = imsm_level_to_layout(info->array.level); + info->array.md_minor = -1; + info->array.ctime = 0; + info->array.utime = 0; + info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512); + + info->data_offset = __le32_to_cpu(map->pba_of_lba0); + info->component_size = __le32_to_cpu(map->blocks_per_member); + + info->disk.major = 0; + info->disk.minor = 0; + + sprintf(info->text_version, "/%s/%d", + devnum2devname(st->container_dev), + info->container_member); +} + + static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) { struct intel_super *super = st->sb; @@ -497,9 +528,10 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) struct imsm_disk *disk; __u32 s; - info->array.major_version = 2000; - get_imsm_numerical_version(mpb, &info->array.minor_version, - &info->array.patch_version); + if (super->current_vol >= 0) { + getinfo_super_imsm_volume(st, info); + return; + } info->array.raid_disks = mpb->num_disks; info->array.level = LEVEL_CONTAINER; info->array.layout = 0; @@ -517,9 +549,12 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->disk.state = 0; if (super->disks) { + disk = get_imsm_disk(mpb, super->disks->index); info->disk.number = super->disks->index; info->disk.raid_disk = super->disks->index; - disk = get_imsm_disk(mpb, super->disks->index); + info->data_offset = __le32_to_cpu(disk->total_blocks) - + (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS); + info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; s = __le32_to_cpu(disk->status); info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0; info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0; @@ -527,35 +562,6 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) } } -static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) -{ - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct imsm_dev *dev = get_imsm_dev(mpb, info->container_member); - struct imsm_map *map = &dev->vol.map[0]; - - info->array.major_version = 2000; - get_imsm_numerical_version(mpb, &info->array.minor_version, - &info->array.patch_version); - info->array.raid_disks = map->num_members; - info->array.level = get_imsm_raid_level(map); - info->array.layout = imsm_level_to_layout(info->array.level); - info->array.md_minor = -1; - info->array.ctime = 0; - info->array.utime = 0; - info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512); - - info->data_offset = __le32_to_cpu(map->pba_of_lba0); - info->component_size = __le32_to_cpu(map->blocks_per_member); - - info->disk.major = 0; - info->disk.minor = 0; - - sprintf(info->text_version, "/%s/%d", - devnum2devname(st->container_dev), - info->container_member); -} - static int update_super_imsm(struct supertype *st, struct mdinfo *info, char *update, char *devname, int verbose, int uuid_set, char *homehost) @@ -758,10 +764,9 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) size_t len, mpb_size; unsigned long long sectors; struct stat; - struct imsm_super anchor; + struct imsm_super *anchor; __u32 check_sum; - memset(super, 0, sizeof(*super)); get_dev_size(fd, NULL, &dsize); if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) { @@ -772,44 +777,45 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 1; } - len = sizeof(anchor); - if (read(fd, &anchor, len) != len) { + len = 512; + if (posix_memalign((void**)&anchor, 512, len) != 0) { + if (devname) + fprintf(stderr, + Name ": Failed to allocate imsm anchor buffer" + " on %s\n", devname); + return 1; + } + if (read(fd, anchor, len) != len) { if (devname) fprintf(stderr, Name ": Cannot read anchor block on %s: %s\n", devname, strerror(errno)); + free(anchor); return 1; } - if (strncmp((char *) anchor.sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) { + if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) { if (devname) fprintf(stderr, Name ": no IMSM anchor on %s\n", devname); + free(anchor); return 2; } - mpb_size = __le32_to_cpu(anchor.mpb_size); - super->mpb = malloc(mpb_size < 512 ? 512 : mpb_size); - if (!super->mpb) { + mpb_size = __le32_to_cpu(anchor->mpb_size); + mpb_size = ROUND_UP(mpb_size, 512); + if (posix_memalign(&super->buf, 512, mpb_size) != 0) { if (devname) fprintf(stderr, Name ": unable to allocate %zu byte mpb buffer\n", mpb_size); + free(anchor); return 2; } - memcpy(super->buf, &anchor, sizeof(anchor)); - - /* read the rest of the first block */ - len = 512 - sizeof(anchor); - if (read(fd, super->buf + sizeof(anchor), len) != len) { - if (devname) - fprintf(stderr, - Name ": Cannot read anchor remainder on %s: %s\n", - devname, strerror(errno)); - return 2; - } + memcpy(super->buf, anchor, len); - sectors = mpb_sectors(&anchor) - 1; + sectors = mpb_sectors(anchor) - 1; + free(anchor); if (!sectors) return load_imsm_disk(fd, super, devname, 0); @@ -844,8 +850,6 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return load_imsm_disk(fd, super, devname, 0); } -struct superswitch super_imsm_container; - static void free_imsm_disks(struct intel_super *super) { while (super->disks) { @@ -887,7 +891,7 @@ static struct intel_super *alloc_super(int creating_imsm) if (super) { memset(super, 0, sizeof(*super)); super->creating_imsm = creating_imsm; - super->creating_dev = -1; + super->current_vol = -1; } return super; @@ -978,9 +982,16 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, close(dfd); } + if (st->subarray[0]) { + if (atoi(st->subarray) <= super->mpb->num_raid_devs) + super->current_vol = atoi(st->subarray); + else + return 1; + } + *sbp = super; if (st->ss == NULL) { - st->ss = &super_imsm_container; + st->ss = &super_imsm; st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; st->container_dev = fd2devnum(fd); @@ -999,6 +1010,8 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0) return 0; #endif + if (st->subarray[0]) + return 1; /* FIXME */ super = alloc_super(0); if (!super) { @@ -1029,49 +1042,22 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) return 0; } -static int init_zero_imsm(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, - char *homehost, int *uuid) +static __u16 info_to_blocks_per_strip(mdu_array_info_t *info) { - st->sb = NULL; - return 0; + if (info->level == 1) + return 128; + return info->chunk_size >> 9; } -static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, - char *homehost, int *uuid) +static __u32 info_to_num_data_stripes(mdu_array_info_t *info) { - /* This is primarily called by Create when creating a new array. - * We will then get add_to_super called for each component, and then - * write_init_super called to write it out to each device. - * For IMSM, Create can create on fresh devices or on a pre-existing - * array. - * To create on a pre-existing array a different method will be called. - * This one is just for fresh drives. - */ - struct intel_super *super; - struct imsm_super *mpb; - size_t mpb_size; - - super = alloc_super(1); - if (!super) - return 0; - mpb_size = disks_to_mpb_size(info->nr_disks); - mpb = malloc(mpb_size); - if (!mpb) { - free(super); - return 0; - } - memset(mpb, 0, mpb_size); + __u32 num_stripes; - memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE)); - memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5, - strlen(MPB_VERSION_RAID5)); - mpb->mpb_size = mpb_size; + num_stripes = (info->size * 2) / info_to_blocks_per_strip(info); + if (info->level == 1) + num_stripes /= 2; - super->mpb = mpb; - st->sb = super; - return 1; + return num_stripes; } static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, @@ -1089,8 +1075,8 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, int idx = mpb->num_raid_devs; int i; unsigned long long array_blocks; - unsigned long long sz; __u32 offset = 0; + size_t size_old, size_new; if (mpb->num_raid_devs >= 2) { fprintf(stderr, Name": This imsm-container already has the " @@ -1098,7 +1084,26 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, return 0; } - super->creating_dev = idx; + /* ensure the mpb is large enough for the new data */ + size_old = __le32_to_cpu(mpb->mpb_size); + size_new = disks_to_mpb_size(info->nr_disks); + if (size_new > size_old) { + void *mpb_new; + size_t size_round = ROUND_UP(size_new, 512); + + if (posix_memalign(&mpb_new, 512, size_round) != 0) { + fprintf(stderr, Name": could not allocate new mpb\n"); + return 0; + } + memcpy(mpb_new, mpb, size_old); + free(mpb); + mpb = mpb_new; + super->mpb = mpb_new; + mpb->mpb_size = __cpu_to_le32(size_new); + memset(mpb_new + size_old, 0, size_round - size_old); + } + super->current_vol = idx; + sprintf(st->subarray, "%d", idx); mpb->num_raid_devs++; dev = get_imsm_dev(mpb, idx); strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); @@ -1122,16 +1127,22 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, } map = &vol->map[0]; map->pba_of_lba0 = __cpu_to_le32(offset); - sz = info->size * 2; - map->blocks_per_member = __cpu_to_le32(sz); - map->blocks_per_strip = __cpu_to_le16(info->chunk_size >> 9); - map->num_data_stripes = __cpu_to_le32(sz / (info->chunk_size >> 9)); + map->blocks_per_member = __cpu_to_le32(info->size * 2); + map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); + map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info)); map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL; + + if (info->level == 1 && info->raid_disks > 2) { + fprintf(stderr, Name": imsm does not support more than 2 disks" + "in a raid1 volume\n"); + return 0; + } if (info->level == 10) map->raid_level = 1; else map->raid_level = info->level; + map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { /* initialized in add_to_super */ @@ -1141,6 +1152,78 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, return 1; } +static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, + unsigned long long size, char *name, + char *homehost, int *uuid) +{ + /* This is primarily called by Create when creating a new array. + * We will then get add_to_super called for each component, and then + * write_init_super called to write it out to each device. + * For IMSM, Create can create on fresh devices or on a pre-existing + * array. + * To create on a pre-existing array a different method will be called. + * This one is just for fresh drives. + */ + struct intel_super *super; + struct imsm_super *mpb; + size_t mpb_size; + + if (!info) { + st->sb = NULL; + return 0; + } + if (st->sb) + return init_super_imsm_volume(st, info, size, name, homehost, + uuid); + + super = alloc_super(1); + if (!super) + return 0; + mpb_size = disks_to_mpb_size(info->nr_disks); + if (posix_memalign(&super->buf, 512, mpb_size) != 0) { + free(super); + return 0; + } + mpb = super->buf; + memset(mpb, 0, mpb_size); + + memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE)); + memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5, + strlen(MPB_VERSION_RAID5)); + mpb->mpb_size = mpb_size; + + st->sb = super; + return 1; +} + +static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, + int fd, char *devname) +{ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->mpb; + struct dl *dl; + struct imsm_dev *dev; + struct imsm_map *map; + struct imsm_disk *disk; + __u32 status; + + dev = get_imsm_dev(mpb, super->current_vol); + map = &dev->vol.map[0]; + + for (dl = super->disks; dl ; dl = dl->next) + if (dl->major == dk->major && + dl->minor == dk->minor) + break; + if (!dl || ! (dk->state & (1<disk_ord_tbl[dk->number] = __cpu_to_le32(dl->index); + + disk = get_imsm_disk(mpb, dl->index); + status = CONFIGURED_DISK | USABLE_DISK; + disk->status = __cpu_to_le32(status); +} + static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { @@ -1153,6 +1236,11 @@ static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, int rv; struct stat stb; + if (super->current_vol >= 0) { + add_to_super_imsm_volume(st, dk, fd, devname); + return; + } + fstat(fd, &stb); dd = malloc(sizeof(*dd)); if (!dd) { @@ -1197,39 +1285,6 @@ static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, super->disks = dd; } -static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, - int fd, char *devname) -{ - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct dl *dl; - struct imsm_dev *dev; - struct imsm_map *map; - struct imsm_disk *disk; - __u32 status; - - if (super->creating_dev == -1) { - fprintf(stderr, Name ": no active raid device\n"); - abort(); - } - - dev = get_imsm_dev(mpb, super->creating_dev); - map = &dev->vol.map[0]; - - for (dl = super->disks; dl ; dl = dl->next) - if (dl->major == dk->major && - dl->minor == dk->minor) - break; - if (!dl || ! (dk->state & (1<disk_ord_tbl[dk->number] = __cpu_to_le32(dl->index); - - disk = get_imsm_disk(mpb, dl->index); - status = CONFIGURED_DISK | USABLE_DISK; - disk->status = __cpu_to_le32(status); -} - static int store_imsm_mpb(int fd, struct intel_super *super); static int write_super_imsm(struct intel_super *super, int doclose) @@ -1265,13 +1320,54 @@ static int write_super_imsm(struct intel_super *super, int doclose) static int write_init_super_imsm(struct supertype *st) { - return write_super_imsm(st->sb, 1); + if (st->update_tail) { + /* queue the recently created array as a metadata update */ + size_t len; + struct imsm_update_create_array *u; + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->mpb; + struct imsm_dev *dev; + struct imsm_map *map; + struct dl *d; + + if (super->current_vol < 0 || + !(dev = get_imsm_dev(mpb, super->current_vol))) { + fprintf(stderr, "%s: could not determine sub-array\n", + __func__); + return 1; + } + + + map = &dev->vol.map[0]; + len = sizeof(*u) + sizeof(__u32) * (map->num_members - 1); + u = malloc(len); + if (!u) { + fprintf(stderr, "%s: failed to allocate update buffer\n", + __func__); + return 1; + } + + u->type = update_create_array; + u->dev_idx = super->current_vol; + memcpy(&u->dev, dev, sizeof(*dev)); + memcpy(u->dev.vol.map[0].disk_ord_tbl, map->disk_ord_tbl, + sizeof(__u32) * map->num_members); + append_metadata_update(st, u, len); + + for (d = super->disks; d ; d = d->next) { + close(d->fd); + d->fd = -1; + } + + return 0; + } else + return write_super_imsm(st->sb, 1); } static int store_zero_imsm(struct supertype *st, int fd) { unsigned long long dsize; - char buf[512]; + void *buf; get_dev_size(fd, NULL, &dsize); @@ -1279,146 +1375,23 @@ static int store_zero_imsm(struct supertype *st, int fd) if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) return 1; + if (posix_memalign(&buf, 512, 512) != 0) + return 1; + memset(buf, 0, sizeof(buf)); if (write(fd, buf, sizeof(buf)) != sizeof(buf)) return 1; - return 0; } -static void getinfo_super_n_imsm_container(struct supertype *st, struct mdinfo *info) +static int validate_geometry_imsm_container(struct supertype *st, int level, + int layout, int raiddisks, int chunk, + unsigned long long size, char *dev, + unsigned long long *freesize, + int verbose) { - /* just need offset and size... - * of the metadata - */ - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct imsm_disk *disk = get_imsm_disk(mpb, info->disk.number); - - info->data_offset = __le32_to_cpu(disk->total_blocks) - - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS); - info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; -} - -static void getinfo_super_n_imsm_volume(struct supertype *st, struct mdinfo *info) -{ - /* Find the particular details for info->disk.raid_disk. - * This includes data_offset, component_size, - */ - struct intel_super *super = st->sb; - struct imsm_super *mpb = super->mpb; - struct imsm_dev *dev = get_imsm_dev(mpb, super->creating_dev); - struct imsm_map *map = &dev->vol.map[0]; - - info->data_offset = __le32_to_cpu(map->pba_of_lba0); - info->component_size = __le32_to_cpu(map->blocks_per_member); -} - -static int validate_geometry_imsm(struct supertype *st, int level, int layout, - int raiddisks, int chunk, unsigned long long size, - char *dev, unsigned long long *freesize) -{ - int fd, cfd; - struct mdinfo *sra; - - /* if given unused devices create a container - * if given given devices in a container create a member volume - */ - if (level == LEVEL_CONTAINER) { - st->ss = &super_imsm_container; - if (dev) { - /* validate the container, dev == NULL */ - int rv = st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, - size, - NULL, freesize); - if (rv) - return rv; - } - return st->ss->validate_geometry(st, level, layout, raiddisks, - chunk, size, dev, freesize); - } - - if (st->sb) { - /* creating in a given container */ - st->ss = &super_imsm_volume; - if (dev) { - int rv = st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, - size, - NULL, freesize); - if (rv) - return rv; - } - return st->ss->validate_geometry(st, level, layout, raiddisks, - chunk, size, dev, freesize); - } - - /* limit creation to the following levels */ - if (!dev) - switch (level) { - case 0: - case 1: - case 10: - case 5: - break; - default: - return 1; - } - - /* This device needs to be a device in an 'imsm' container */ - fd = open(dev, O_RDONLY|O_EXCL, 0); - if (fd >= 0) { - fprintf(stderr, - Name ": Cannot create this array on device %s\n", - dev); - close(fd); - return 0; - } - if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) { - fprintf(stderr, Name ": Cannot open %s: %s\n", - dev, strerror(errno)); - return 0; - } - /* Well, it is in use by someone, maybe an 'imsm' container. */ - cfd = open_container(fd); - if (cfd < 0) { - close(fd); - fprintf(stderr, Name ": Cannot use %s: It is busy\n", - dev); - return 0; - } - sra = sysfs_read(cfd, 0, GET_VERSION); - close(fd); - if (sra && sra->array.major_version == -1 && - strcmp(sra->text_version, "imsm") == 0) { - /* This is a member of a imsm container. Load the container - * and try to create a volume - */ - struct intel_super *super; - st->ss = &super_imsm_volume; - if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) { - st->sb = super; - st->container_dev = fd2devnum(cfd); - close(cfd); - return st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, size, - dev, freesize); - } - close(cfd); - } else /* may belong to another container */ - return 0; - - return 1; -} - -static int validate_geometry_imsm_container(struct supertype *st, int level, - int layout, int raiddisks, int chunk, - unsigned long long size, char *dev, - unsigned long long *freesize) -{ - int fd; - unsigned long long ldsize; + int fd; + unsigned long long ldsize; if (level != LEVEL_CONTAINER) return 0; @@ -1427,8 +1400,9 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, fd = open(dev, O_RDONLY|O_EXCL, 0); if (fd < 0) { - fprintf(stderr, Name ": Cannot open %s: %s\n", - dev, strerror(errno)); + if (verbose) + fprintf(stderr, Name ": imsm: Cannot open %s: %s\n", + dev, strerror(errno)); return 0; } if (!get_dev_size(fd, dev, &ldsize)) { @@ -1448,7 +1422,8 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, static int validate_geometry_imsm_volume(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, char *dev, - unsigned long long *freesize) + unsigned long long *freesize, + int verbose) { struct stat stb; struct intel_super *super = st->sb; @@ -1462,8 +1437,9 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 0; if (level == 1 && raiddisks > 2) { - fprintf(stderr, Name ": imsm does not support more than 2 " - "in a raid1 configuration\n"); + if (verbose) + fprintf(stderr, Name ": imsm does not support more " + "than 2 in a raid1 configuration\n"); return 0; } @@ -1473,15 +1449,18 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, if (!dev) { /* General test: make sure there is space for - * 'raiddisks' device extents of size 'size'. + * 'raiddisks' device extents of size 'size' at a given + * offset */ unsigned long long minsize = size*2 /* convert to blocks */; + unsigned long long start_offset = ~0ULL; int dcnt = 0; if (minsize == 0) minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; for (dl = super->disks; dl ; dl = dl->next) { int found = 0; + pos = 0; i = 0; e = get_extents(super, dl); if (!e) continue; @@ -1490,6 +1469,13 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, esize = e[i].start - pos; if (esize >= minsize) found = 1; + if (found && start_offset == ~0ULL) { + start_offset = pos; + break; + } else if (found && pos != start_offset) { + found = 0; + break; + } pos = e[i].start + e[i].size; i++; } while (e[i-1].size); @@ -1498,9 +1484,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, free(e); } if (dcnt < raiddisks) { - fprintf(stderr, Name ": Not enough devices with space " - "for this array (%d < %d)\n", - dcnt, raiddisks); + if (verbose) + fprintf(stderr, Name ": imsm: Not enough " + "devices with space for this array " + "(%d < %d)\n", + dcnt, raiddisks); return 0; } return 1; @@ -1516,8 +1504,9 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, break; } if (!dl) { - fprintf(stderr, Name ": %s is not in the same imsm set\n", - dev); + if (verbose) + fprintf(stderr, Name ": %s is not in the " + "same imsm set\n", dev); return 0; } e = get_extents(super, dl); @@ -1536,6 +1525,94 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 1; } +static int validate_geometry_imsm(struct supertype *st, int level, int layout, + int raiddisks, int chunk, unsigned long long size, + char *dev, unsigned long long *freesize, + int verbose) +{ + int fd, cfd; + struct mdinfo *sra; + + /* if given unused devices create a container + * if given given devices in a container create a member volume + */ + if (level == LEVEL_CONTAINER) { + /* Must be a fresh device to add to a container */ + return validate_geometry_imsm_container(st, level, layout, + raiddisks, chunk, size, + dev, freesize, + verbose); + } + + if (st->sb) { + /* creating in a given container */ + return validate_geometry_imsm_volume(st, level, layout, + raiddisks, chunk, size, + dev, freesize, verbose); + } + + /* limit creation to the following levels */ + if (!dev) + switch (level) { + case 0: + case 1: + case 10: + case 5: + break; + default: + return 1; + } + + /* This device needs to be a device in an 'imsm' container */ + fd = open(dev, O_RDONLY|O_EXCL, 0); + if (fd >= 0) { + if (verbose) + fprintf(stderr, + Name ": Cannot create this array on device %s\n", + dev); + close(fd); + return 0; + } + if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) { + if (verbose) + fprintf(stderr, Name ": Cannot open %s: %s\n", + dev, strerror(errno)); + return 0; + } + /* Well, it is in use by someone, maybe an 'imsm' container. */ + cfd = open_container(fd); + if (cfd < 0) { + close(fd); + if (verbose) + fprintf(stderr, Name ": Cannot use %s: It is busy\n", + dev); + return 0; + } + sra = sysfs_read(cfd, 0, GET_VERSION); + close(fd); + if (sra && sra->array.major_version == -1 && + strcmp(sra->text_version, "imsm") == 0) { + /* This is a member of a imsm container. Load the container + * and try to create a volume + */ + struct intel_super *super; + + if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) { + st->sb = super; + st->container_dev = fd2devnum(cfd); + close(cfd); + return validate_geometry_imsm_volume(st, level, layout, + raiddisks, chunk, + size, dev, + freesize, verbose); + } + close(cfd); + } else /* may belong to another container */ + return 0; + + return 1; +} + static struct mdinfo *container_content_imsm(struct supertype *st) { /* Given a container loaded by load_super_imsm_all, @@ -1564,9 +1641,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st) this->next = rest; rest = this; - this->array.major_version = 2000; - get_imsm_numerical_version(mpb, &this->array.minor_version, - &this->array.patch_version); this->array.level = get_imsm_raid_level(map); this->array.raid_disks = map->num_members; this->array.layout = imsm_level_to_layout(this->array.level); @@ -1646,6 +1720,15 @@ static struct mdinfo *container_content_imsm(struct supertype *st) static int imsm_open_new(struct supertype *c, struct active_array *a, char *inst) { + struct intel_super *super = c->sb; + struct imsm_super *mpb = super->mpb; + + if (atoi(inst) + 1 > mpb->num_raid_devs) { + fprintf(stderr, "%s: subarry index %d, out of range\n", + __func__, atoi(inst)); + return -ENODEV; + } + dprintf("imsm: open_new %s\n", inst); a->info.container_member = atoi(inst); return 0; @@ -1821,7 +1904,7 @@ static int store_imsm_mpb(int fd, struct intel_super *super) if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) return 1; - if (write(fd, super->buf + 512, mpb_size - 512) != mpb_size - 512) + if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors) return 1; } @@ -1832,8 +1915,6 @@ static int store_imsm_mpb(int fd, struct intel_super *super) if (write(fd, super->buf, 512) != 512) return 1; - fsync(fd); - return 0; } @@ -1849,12 +1930,403 @@ static void imsm_sync_metadata(struct supertype *container) super->updates_pending = 0; } +static struct mdinfo *imsm_activate_spare(struct active_array *a, + struct metadata_update **updates) +{ + /** + * Take a device that is marked spare in the metadata and use it to + * replace a failed/vacant slot in an array. There may be a case where + * a device is failed in one array but active in a second. + * imsm_process_update catches this case and does not clear the SPARE_DISK + * flag, allowing the second array to start using the device on failure. + * SPARE_DISK is cleared when all arrays are using a device. + * + * FIXME: is this a valid use of SPARE_DISK? + */ + + struct intel_super *super = a->container->sb; + struct imsm_super *mpb = super->mpb; + int inst = a->info.container_member; + struct imsm_dev *dev = get_imsm_dev(mpb, inst); + struct imsm_map *map = dev->vol.map; + int failed = a->info.array.raid_disks; + struct mdinfo *rv = NULL; + struct mdinfo *d; + struct mdinfo *di; + struct metadata_update *mu; + struct dl *dl; + struct imsm_update_activate_spare *u; + int num_spares = 0; + int i; + + for (d = a->info.devs ; d ; d = d->next) { + if ((d->curr_state & DS_FAULTY) && + d->state_fd >= 0) + /* wait for Removal to happen */ + return NULL; + if (d->state_fd >= 0) + failed--; + } + + dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", + inst, failed, a->info.array.raid_disks, a->info.array.level); + if (imsm_check_degraded(mpb, inst, failed) != IMSM_T_STATE_DEGRADED) + return NULL; + + /* For each slot, if it is not working, find a spare */ + dl = super->disks; + for (i = 0; i < a->info.array.raid_disks; i++) { + for (d = a->info.devs ; d ; d = d->next) + if (d->disk.raid_disk == i) + break; + dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0); + if (d && (d->state_fd >= 0)) + continue; + + /* OK, this device needs recovery. Find a spare */ + for ( ; dl ; dl = dl->next) { + unsigned long long esize; + unsigned long long pos; + struct mdinfo *d2; + struct extent *ex; + struct imsm_disk *disk; + int j; + int found; + __u32 array_start; + + /* If in this array, skip */ + for (d2 = a->info.devs ; d2 ; d2 = d2->next) + if (d2->disk.major == dl->major && + d2->disk.minor == dl->minor) { + dprintf("%x:%x already in array\n", dl->major, dl->minor); + break; + } + if (d2) + continue; + + /* is this unused device marked as a spare? */ + disk = get_imsm_disk(mpb, dl->index); + if (!(__le32_to_cpu(disk->status) & SPARE_DISK)) + continue; + + /* We are allowed to use this device - is there space? + * We need a->info.component_size sectors */ + ex = get_extents(super, dl); + if (!ex) { + dprintf("cannot get extents\n"); + continue; + } + found = 0; + j = 0; + pos = 0; + array_start = __le32_to_cpu(map->pba_of_lba0); + + do { + /* check that we can start at pba_of_lba0 with + * a->info.component_size of space + */ + esize = ex[j].start - pos; + if (array_start >= pos && + array_start + a->info.component_size < ex[j].start) { + found = 1; + break; + } + pos = ex[j].start + ex[j].size; + j++; + + } while (ex[j-1].size); + + free(ex); + if (!found) { + dprintf("%x:%x does not have %llu at %d\n", + dl->major, dl->minor, + a->info.component_size, + __le32_to_cpu(map->pba_of_lba0)); + /* No room */ + continue; + } + + /* found a usable disk with enough space */ + di = malloc(sizeof(*di)); + memset(di, 0, sizeof(*di)); + di->disk.number = dl->index; + di->disk.raid_disk = i; + di->disk.major = dl->major; + di->disk.minor = dl->minor; + di->disk.state = 0; + di->data_offset = array_start; + di->component_size = a->info.component_size; + di->container_member = inst; + di->next = rv; + rv = di; + num_spares++; + dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor, + i, pos); + + break; + } + } + + if (!rv) + /* No spares found */ + return rv; + /* Now 'rv' has a list of devices to return. + * Create a metadata_update record to update the + * disk_ord_tbl for the array + */ + mu = malloc(sizeof(*mu)); + mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares); + mu->space = NULL; + mu->len = sizeof(struct imsm_update_activate_spare) * num_spares; + mu->next = *updates; + u = (struct imsm_update_activate_spare *) mu->buf; + + for (di = rv ; di ; di = di->next) { + u->type = update_activate_spare; + u->disk_idx = di->disk.number; + u->slot = di->disk.raid_disk; + u->array = inst; + u->next = u + 1; + u++; + } + (u-1)->next = NULL; + *updates = mu; + + return rv; +} + +static int weight(unsigned int field) +{ + int weight; + + for (weight = 0; field; weight++) + field &= field - 1; + + return weight; +} + +static int disks_overlap(struct imsm_map *m1, struct imsm_map *m2) +{ + int i; + int j; + int idx; + + for (i = 0; i < m1->num_members; i++) { + idx = get_imsm_disk_idx(m1, i); + for (j = 0; j < m2->num_members; j++) + if (idx == get_imsm_disk_idx(m2, j)) + return 1; + } + + return 0; +} + +static void imsm_process_update(struct supertype *st, + struct metadata_update *update) +{ + /** + * crack open the metadata_update envelope to find the update record + * update can be one of: + * update_activate_spare - a spare device has replaced a failed + * device in an array, update the disk_ord_tbl. If this disk is + * present in all member arrays then also clear the SPARE_DISK + * flag + */ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->mpb; + enum imsm_update_type type = *(enum imsm_update_type *) update->buf; + + switch (type) { + case update_activate_spare: { + struct imsm_update_activate_spare *u = (void *) update->buf; + struct imsm_dev *dev = get_imsm_dev(mpb, u->array); + struct imsm_map *map = &dev->vol.map[0]; + struct active_array *a; + struct imsm_disk *disk; + __u32 status; + struct dl *dl; + struct mdinfo *d; + unsigned int members; + unsigned int found; + int victim; + int i; + + for (dl = super->disks; dl; dl = dl->next) + if (dl->index == u->disk_idx) + break; + + if (!dl) { + fprintf(stderr, "error: imsm_activate_spare passed " + "an unknown disk_idx: %d\n", u->disk_idx); + return; + } + + super->updates_pending++; + + victim = get_imsm_disk_idx(map, u->slot); + map->disk_ord_tbl[u->slot] = __cpu_to_le32(u->disk_idx); + disk = get_imsm_disk(mpb, u->disk_idx); + status = __le32_to_cpu(disk->status); + status |= CONFIGURED_DISK; + disk->status = __cpu_to_le32(status); + + /* map unique/live arrays using the spare */ + members = 0; + found = 0; + for (a = st->arrays; a; a = a->next) { + int inst = a->info.container_member; + + dev = get_imsm_dev(mpb, inst); + map = &dev->vol.map[0]; + if (map->raid_level > 0) + members |= 1 << inst; + for (d = a->info.devs; d; d = d->next) + if (d->disk.major == dl->major && + d->disk.minor == dl->minor) + found |= 1 << inst; + } + + /* until all arrays that can absorb this disk have absorbed + * this disk it can still be considered a spare + */ + if (weight(found) >= weight(members)) { + status = __le32_to_cpu(disk->status); + status &= ~SPARE_DISK; + disk->status = __cpu_to_le32(status); + } + + /* count arrays using the victim in the metadata */ + found = 0; + for (a = st->arrays; a ; a = a->next) { + dev = get_imsm_dev(mpb, a->info.container_member); + map = &dev->vol.map[0]; + for (i = 0; i < map->num_members; i++) + if (victim == get_imsm_disk_idx(map, i)) + found++; + } + + /* clear some flags if the victim is no longer being + * utilized anywhere + */ + disk = get_imsm_disk(mpb, victim); + if (!found) { + status = __le32_to_cpu(disk->status); + status &= ~(CONFIGURED_DISK | USABLE_DISK); + disk->status = __cpu_to_le32(status); + } + break; + } + case update_create_array: { + /* someone wants to create a new array, we need to be aware of + * a few races/collisions: + * 1/ 'Create' called by two separate instances of mdadm + * 2/ 'Create' versus 'activate_spare': mdadm has chosen + * devices that have since been assimilated via + * activate_spare. + * In the event this update can not be carried out mdadm will + * (FIX ME) notice that its update did not take hold. + */ + struct imsm_update_create_array *u = (void *) update->buf; + struct imsm_dev *dev; + struct imsm_map *map, *new_map; + unsigned long long start, end; + unsigned long long new_start, new_end; + int i; + int overlap = 0; + + /* handle racing creates: first come first serve */ + if (u->dev_idx < mpb->num_raid_devs) { + dprintf("%s: subarray %d already defined\n", + __func__, u->dev_idx); + return; + } + + /* check update is next in sequence */ + if (u->dev_idx != mpb->num_raid_devs) { + dprintf("%s: can not create arrays out of sequence\n", + __func__); + return; + } + + new_map = &u->dev.vol.map[0]; + new_start = __le32_to_cpu(new_map->pba_of_lba0); + new_end = new_start + __le32_to_cpu(new_map->blocks_per_member); + + /* handle activate_spare versus create race: + * check to make sure that overlapping arrays do not include + * overalpping disks + */ + for (i = 0; i < mpb->num_raid_devs; i++) { + dev = get_imsm_dev(mpb, i); + map = &dev->vol.map[0]; + start = __le32_to_cpu(map->pba_of_lba0); + end = start + __le32_to_cpu(map->blocks_per_member); + if ((new_start >= start && new_start <= end) || + (start >= new_start && start <= new_end)) + overlap = 1; + if (overlap && disks_overlap(map, new_map)) { + dprintf("%s: arrays overlap\n", __func__); + return; + } + } + /* check num_members sanity */ + if (new_map->num_members > mpb->num_disks) { + dprintf("%s: num_disks out of range\n", __func__); + return; + } + + super->updates_pending++; + mpb->num_raid_devs++; + dev = get_imsm_dev(mpb, u->dev_idx); + memcpy(dev, &u->dev, sizeof(*dev)); + map = &dev->vol.map[0]; + memcpy(map->disk_ord_tbl, new_map->disk_ord_tbl, + sizeof(__u32) * new_map->num_members); + + /* fix up flags, if arrays overlap then the drives can not be + * spares + */ + for (i = 0; i < map->num_members; i++) { + struct imsm_disk *disk; + __u32 status; + + disk = get_imsm_disk(mpb, get_imsm_disk_idx(map, i)); + status = __le32_to_cpu(disk->status); + status |= CONFIGURED_DISK; + if (overlap) + status &= ~SPARE_DISK; + disk->status = __cpu_to_le32(status); + } + break; + } + } +} + +static void imsm_prepare_update(struct supertype *st, + struct metadata_update *update) +{ + /* Allocate space to hold a new mpb if necessary. We currently + * allocate enough to hold 2 subarrays for the given number of disks. + * This may not be sufficient iff reshaping. + * + * FIX ME handle the reshape case. + * + * The monitor will be able to safely change super->mpb by arranging + * for it to be freed in check_update_queue(). I.e. the monitor thread + * will start using the new pointer and the manager can continue to use + * the old value until check_update_queue() runs. + */ + + return; +} + struct superswitch super_imsm = { #ifndef MDASSEMBLE .examine_super = examine_super_imsm, .brief_examine_super = brief_examine_super_imsm, .detail_super = detail_super_imsm, .brief_detail_super = brief_detail_super_imsm, + .write_init_super = write_init_super_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, @@ -1866,15 +2338,14 @@ struct superswitch super_imsm = { .compare_super = compare_super_imsm, .load_super = load_super_imsm, - .init_super = init_zero_imsm, + .init_super = init_super_imsm, + .add_to_super = add_to_super_imsm, .store_super = store_zero_imsm, .free_super = free_super_imsm, .match_metadata_desc = match_metadata_desc_imsm, - .getinfo_super_n = getinfo_super_n_imsm_container, + .container_content = container_content_imsm, .validate_geometry = validate_geometry_imsm, - .major = 2000, - .swapuuid = 0, .external = 1, /* for mdmon */ @@ -1883,52 +2354,7 @@ struct superswitch super_imsm = { .set_array_state= imsm_set_array_state, .set_disk = imsm_set_disk, .sync_metadata = imsm_sync_metadata, -}; - -/* super_imsm_container is set by validate_geometry_imsm when given a - * device that is not part of any array - */ -struct superswitch super_imsm_container = { - - .validate_geometry = validate_geometry_imsm_container, - .init_super = init_super_imsm, - .add_to_super = add_to_super_imsm, - .write_init_super = write_init_super_imsm, - .getinfo_super = getinfo_super_imsm, - .getinfo_super_n = getinfo_super_n_imsm_container, - .load_super = load_super_imsm, - -#ifndef MDASSEMBLE - .examine_super = examine_super_imsm, - .brief_examine_super = brief_examine_super_imsm, - .detail_super = detail_super_imsm, - .brief_detail_super = brief_detail_super_imsm, -#endif - - .free_super = free_super_imsm, - - .container_content = container_content_imsm, - - .major = 2000, - .swapuuid = 0, - .external = 1, -}; - -struct superswitch super_imsm_volume = { - .update_super = update_super_imsm, - .init_super = init_super_imsm_volume, - .add_to_super = add_to_super_imsm_volume, - .getinfo_super = getinfo_super_imsm_volume, - .getinfo_super_n = getinfo_super_n_imsm_volume, - .write_init_super = write_init_super_imsm, - - .load_super = load_super_imsm, - .free_super = free_super_imsm, - .match_metadata_desc = match_metadata_desc_imsm_volume, - - - .validate_geometry = validate_geometry_imsm_volume, - .major = 2001, - .swapuuid = 0, - .external = 2, + .activate_spare = imsm_activate_spare, + .process_update = imsm_process_update, + .prepare_update = imsm_prepare_update, };