X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=super-ddf.c;h=14f83304bde10c8f4ddff2bc17d5f4eb1d08dce5;hp=0d2a45617ca52c8ce494645c3d297d56fceae632;hb=d23534e4646313a67296b295666d165a87bb2c92;hpb=1ba6bff90226a609700b8e78345a41bbe1997845 diff --git a/super-ddf.c b/super-ddf.c index 0d2a4561..14f83304 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2006-2007 Neil Brown + * Copyright (C) 2006-2009 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -31,11 +31,6 @@ #include "sha1.h" #include -static inline int ROUND_UP(int a, int base) -{ - return ((a+base-1)/base)*base; -} - /* a non-official T10 name for creation GUIDs */ static char T10[] = "Linux-MD"; @@ -71,7 +66,7 @@ unsigned long crc32( #define DDF_CONCAT 0x1f #define DDF_RAID5E 0x15 #define DDF_RAID5EE 0x25 -#define DDF_RAID6 0x16 /* Vendor unique layout */ +#define DDF_RAID6 0x06 /* Raid Level Qualifier (RLQ) */ #define DDF_RAID0_SIMPLE 0x00 @@ -83,6 +78,7 @@ unsigned long crc32( #define DDF_RAID4_N 0x01 /* parity in last extent */ /* these apply to raid5e and raid5ee as well */ #define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */ +#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */ #define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */ #define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */ @@ -108,13 +104,14 @@ unsigned long crc32( #define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C) #define DDF_GUID_LEN 24 -#define DDF_REVISION "01.00.00" +#define DDF_REVISION_0 "01.00.00" +#define DDF_REVISION_2 "01.02.00" struct ddf_header { __u32 magic; /* DDF_HEADER_MAGIC */ __u32 crc; char guid[DDF_GUID_LEN]; - char revision[8]; /* 01.00.00 */ + char revision[8]; /* 01.02.00 */ __u32 seq; /* starts at '1' */ __u32 timestamp; __u8 openflag; @@ -393,36 +390,55 @@ struct bad_block_log { * built in Create or Assemble to describe the whole array. */ struct ddf_super { - struct ddf_header anchor, primary, secondary, *active; + struct ddf_header anchor, primary, secondary; struct ddf_controller_data controller; + struct ddf_header *active; struct phys_disk *phys; struct virtual_disk *virt; int pdsize, vdsize; int max_part, mppe, conf_rec_len; + int currentdev; + int updates_pending; struct vcl { - struct vcl *next; - __u64 *lba_offset; /* location in 'conf' of - * the lba table */ + union { + char space[512]; + struct { + struct vcl *next; + __u64 *lba_offset; /* location in 'conf' of + * the lba table */ + int vcnum; /* index into ->virt */ + __u64 *block_sizes; /* NULL if all the same */ + }; + }; struct vd_config conf; - } *conflist, *newconf; - int conf_num; /* Index into 'virt' of entry matching 'newconf' */ + } *conflist, *currentconf; struct dl { - struct dl *next; + union { + char space[512]; + struct { + struct dl *next; + int major, minor; + char *devname; + int fd; + unsigned long long size; /* sectors */ + int pdnum; /* index in ->phys */ + struct spare_assign *spare; + void *mdupdate; /* hold metadata update */ + + /* These fields used by auto-layout */ + int raiddisk; /* slot to fill in autolayout */ + __u64 esize; + }; + }; struct disk_data disk; - int major, minor; - char *devname; - int fd; - int pdnum; /* index in ->phys */ - struct spare_assign *spare; struct vcl *vlist[0]; /* max_part in size */ - } *dlist; + } *dlist, *add_list; }; #ifndef offsetof #define offsetof(t,f) ((size_t)&(((t*)0)->f)) #endif -extern struct superswitch super_ddf_container, super_ddf_bvd, super_ddf; static int calc_crc(void *buf, int len) { @@ -434,7 +450,10 @@ static int calc_crc(void *buf, int len) newcrc = crc32(0, buf, len); ddf->crc = oldcrc; - return newcrc; + /* The crc is store (like everything) bigendian, so convert + * here for simplicity + */ + return __cpu_to_be32(newcrc); } static int load_ddf_header(int fd, unsigned long long lba, @@ -492,8 +511,9 @@ static void *load_section(int fd, struct ddf_super *super, void *buf, /* All pre-allocated sections are a single block */ if (len != 1) return NULL; - } else - buf = malloc(len<<9); + } else if (posix_memalign(&buf, 512, len<<9) != 0) + buf = NULL; + if (!buf) return NULL; @@ -547,11 +567,12 @@ static int load_ddf_headers(int fd, struct ddf_super *super, char *devname) devname); return 2; } - if (memcmp(super->anchor.revision, DDF_REVISION, 8) != 0) { + if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 && + memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) { if (devname) fprintf(stderr, Name ": can only support super revision" - " %.8s, not %.8s on %s\n", - DDF_REVISION, super->anchor.revision, devname); + " %.8s and earlier, not %.8s on %s\n", + DDF_REVISION_2, super->anchor.revision,devname); return 2; } if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba), @@ -622,11 +643,19 @@ static int load_ddf_local(int fd, struct ddf_super *super, struct stat stb; char *conf; int i; + int confsec; int vnum; + int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries); + unsigned long long dsize; /* First the local disk info */ - dl = malloc(sizeof(*dl) + - (super->max_part) * sizeof(dl->vlist[0])); + if (posix_memalign((void**)&dl, 512, + sizeof(*dl) + + (super->max_part) * sizeof(dl->vlist[0])) != 0) { + fprintf(stderr, Name ": %s could not allocate disk info buffer\n", + __func__); + return 1; + } load_section(fd, super, &dl->disk, super->active->data_section_offset, @@ -639,17 +668,20 @@ static int load_ddf_local(int fd, struct ddf_super *super, dl->minor = minor(stb.st_rdev); dl->next = super->dlist; dl->fd = keep ? fd : -1; + + dl->size = 0; + if (get_dev_size(fd, devname, &dsize)) + dl->size = dsize >> 9; dl->spare = NULL; for (i=0 ; i < super->max_part ; i++) dl->vlist[i] = NULL; super->dlist = dl; - dl->pdnum = 0; + dl->pdnum = -1; for (i=0; i < __be16_to_cpu(super->active->max_pd_entries); i++) if (memcmp(super->phys->entries[i].guid, dl->disk.guid, DDF_GUID_LEN) == 0) dl->pdnum = i; - /* Now the config list. */ /* 'conf' is an array of config entries, some of which are * probably invalid. Those which are good need to be copied into @@ -662,17 +694,24 @@ static int load_ddf_local(int fd, struct ddf_super *super, 0); vnum = 0; - for (i = 0; - i < __be32_to_cpu(super->active->config_section_length); - i += super->conf_rec_len) { + for (confsec = 0; + confsec < __be32_to_cpu(super->active->config_section_length); + confsec += super->conf_rec_len) { struct vd_config *vd = - (struct vd_config *)((char*)conf + i*512); + (struct vd_config *)((char*)conf + confsec*512); struct vcl *vcl; if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) { if (dl->spare) continue; - dl->spare = malloc(super->conf_rec_len*512); + if (posix_memalign((void**)&dl->spare, 512, + super->conf_rec_len*512) != 0) { + fprintf(stderr, Name + ": %s could not allocate spare info buf\n", + __func__); + return 1; + } + memcpy(dl->spare, vd, super->conf_rec_len*512); continue; } @@ -689,16 +728,30 @@ static int load_ddf_local(int fd, struct ddf_super *super, if (__be32_to_cpu(vd->seqnum) <= __be32_to_cpu(vcl->conf.seqnum)) continue; - } else { - vcl = malloc(super->conf_rec_len*512 + - offsetof(struct vcl, conf)); + } else { + if (posix_memalign((void**)&vcl, 512, + (super->conf_rec_len*512 + + offsetof(struct vcl, conf))) != 0) { + fprintf(stderr, Name + ": %s could not allocate vcl buf\n", + __func__); + return 1; + } vcl->next = super->conflist; + vcl->block_sizes = NULL; /* FIXME not for CONCAT */ super->conflist = vcl; dl->vlist[vnum++] = vcl; } memcpy(&vcl->conf, vd, super->conf_rec_len*512); vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[super->mppe]; + + for (i=0; i < max_virt_disks ; i++) + if (memcmp(super->virt->entries[i].guid, + vcl->conf.guid, DDF_GUID_LEN)==0) + break; + if (i < max_virt_disks) + vcl->vcnum = i; } free(conf); @@ -709,6 +762,9 @@ static int load_ddf_local(int fd, struct ddf_super *super, static int load_super_ddf_all(struct supertype *st, int fd, void **sbp, char *devname, int keep_fd); #endif + +static void free_super_ddf(struct supertype *st); + static int load_super_ddf(struct supertype *st, int fd, char *devname) { @@ -717,35 +773,37 @@ static int load_super_ddf(struct supertype *st, int fd, int rv; #ifndef MDASSEMBLE + /* if 'fd' is a container, load metadata from all the devices */ if (load_super_ddf_all(st, fd, &st->sb, devname, 1) == 0) return 0; #endif + if (st->subarray[0]) + return 1; /* FIXME Is this correct */ if (get_dev_size(fd, devname, &dsize) == 0) return 1; /* 32M is a lower bound */ if (dsize <= 32*1024*1024) { - if (devname) { + if (devname) fprintf(stderr, Name ": %s is too small for ddf: " "size is %llu sectors.\n", devname, dsize>>9); - return 1; - } + return 1; } if (dsize & 511) { - if (devname) { + if (devname) fprintf(stderr, Name ": %s is an odd size for ddf: " "size is %llu bytes.\n", devname, dsize); - return 1; - } + return 1; } - super = malloc(sizeof(*super)); - if (!super) { + free_super_ddf(st); + + if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) { fprintf(stderr, Name ": malloc of %zu failed.\n", sizeof(*super)); return 1; @@ -771,7 +829,28 @@ static int load_super_ddf(struct supertype *st, int fd, return rv; } - load_ddf_local(fd, super, devname, 0); + rv = load_ddf_local(fd, super, devname, 0); + + if (rv) { + if (devname) + fprintf(stderr, + Name ": Failed to load all information " + "sections on %s\n", devname); + free(super); + return rv; + } + + if (st->subarray[0]) { + struct vcl *v; + + for (v = super->conflist; v; v = v->next) + if (v->vcnum == atoi(st->subarray)) + super->currentconf = v; + if (!super->currentconf) { + free(super); + return 1; + } + } /* Should possibly check the sections .... */ @@ -781,6 +860,7 @@ static int load_super_ddf(struct supertype *st, int fd, st->minor_version = 0; st->max_devs = 512; } + st->loaded_container = 0; return 0; } @@ -795,6 +875,8 @@ static void free_super_ddf(struct supertype *st) while (ddf->conflist) { struct vcl *v = ddf->conflist; ddf->conflist = v->next; + if (v->block_sizes) + free(v->block_sizes); free(v); } while (ddf->dlist) { @@ -820,6 +902,7 @@ static struct supertype *match_metadata_desc_ddf(char *arg) return NULL; st = malloc(sizeof(*st)); + memset(st, 0, sizeof(*st)); st->ss = &super_ddf; st->max_devs = 512; st->minor_version = 0; @@ -827,38 +910,6 @@ static struct supertype *match_metadata_desc_ddf(char *arg) return st; } -static struct supertype *match_metadata_desc_ddf_bvd(char *arg) -{ - struct supertype *st; - if (strcmp(arg, "ddf/bvd") != 0 && - strcmp(arg, "bvd") != 0 && - strcmp(arg, "default") != 0 - ) - return NULL; - - st = malloc(sizeof(*st)); - st->ss = &super_ddf_bvd; - st->max_devs = 512; - st->minor_version = 0; - st->sb = NULL; - return st; -} -static struct supertype *match_metadata_desc_ddf_svd(char *arg) -{ - struct supertype *st; - if (strcmp(arg, "ddf/svd") != 0 && - strcmp(arg, "svd") != 0 && - strcmp(arg, "default") != 0 - ) - return NULL; - - st = malloc(sizeof(*st)); - st->ss = &super_ddf_svd; - st->max_devs = 512; - st->minor_version = 0; - st->sb = NULL; - return st; -} #ifndef MDASSEMBLE @@ -939,34 +990,50 @@ static int map_num1(struct num_mapping *map, int num) return map[i].num2; } +static int all_ff(char *guid) +{ + int i; + for (i = 0; i < DDF_GUID_LEN; i++) + if (guid[i] != (char)0xff) + return 0; + return 1; +} + #ifndef MDASSEMBLE static void print_guid(char *guid, int tstamp) { /* A GUIDs are part (or all) ASCII and part binary. * They tend to be space padded. - * We ignore trailing spaces and print numbers - * <0x20 and >=0x7f as \xXX - * Some GUIDs have a time stamp in bytes 16-19. - * We print that if appropriate + * We print the GUID in HEX, then in parentheses add + * any initial ASCII sequence, and a possible + * time stamp from bytes 16-19 */ int l = DDF_GUID_LEN; int i; + + for (i=0 ; i= 0x20 && guid[i] < 0x7f) fputc(guid[i], stdout); else - fprintf(stdout, "\\x%02x", guid[i]&255); + break; } if (tstamp) { time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE; char tbuf[100]; struct tm *tm; tm = localtime(&then); - strftime(tbuf, 100, " (%D %T)",tm); + strftime(tbuf, 100, " %D %T",tm); fputs(tbuf, stdout); } + printf(")"); } static void examine_vd(int n, struct ddf_super *sb, char *guid) @@ -975,6 +1042,7 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid) struct vcl *vcl; for (vcl = sb->conflist ; vcl ; vcl = vcl->next) { + int i; struct vd_config *vc = &vcl->conf; if (calc_crc(vc, crl*512) != vc->crc) @@ -983,8 +1051,22 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid) continue; /* Ok, we know about this VD, let's give more details */ - printf(" Raid Devices[%d] : %d\n", n, + printf(" Raid Devices[%d] : %d (", n, __be16_to_cpu(vc->prim_elmnt_count)); + for (i=0; i<__be16_to_cpu(vc->prim_elmnt_count); i++) { + int j; + int cnt = __be16_to_cpu(sb->phys->used_pdes); + for (j=0; jphys_refnum[i] == sb->phys->entries[j].refnum) + break; + if (i) printf(" "); + if (j < cnt) + printf("%d", j); + else + printf("--"); + } + printf(")\n"); + if (vc->chunk_shift != 255) printf(" Chunk Size[%d] : %d sectors\n", n, 1 << vc->chunk_shift); printf(" Raid Level[%d] : %s\n", n, @@ -996,9 +1078,9 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid) map_num(ddf_sec_level, vc->srl) ?: "-unknown-"); } printf(" Device Size[%d] : %llu\n", n, - __be64_to_cpu(vc->blocks)/2); + (unsigned long long)__be64_to_cpu(vc->blocks)/2); printf(" Array Size[%d] : %llu\n", n, - __be64_to_cpu(vc->array_blocks)/2); + (unsigned long long)__be64_to_cpu(vc->array_blocks)/2); } } @@ -1010,6 +1092,7 @@ static void examine_vds(struct ddf_super *sb) for (i=0; ivirt->entries[i]; + printf("\n"); printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1); printf("\n"); printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit)); @@ -1033,40 +1116,44 @@ static void examine_pds(struct ddf_super *sb) int i; struct dl *dl; printf(" Physical Disks : %d\n", cnt); + printf(" Number RefNo Size Device Type/State\n"); for (i=0 ; iphys->entries[i]; int type = __be16_to_cpu(pd->type); int state = __be16_to_cpu(pd->state); - printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0); - printf("\n"); - printf(" ref[%d] : %08x\n", i, + //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0); + //printf("\n"); + printf(" %3d %08x ", i, __be32_to_cpu(pd->refnum)); - printf(" mode[%d] : %s%s%s%s%s\n", i, + printf("%8lluK ", + (unsigned long long)__be64_to_cpu(pd->config_size)>>1); + for (dl = sb->dlist; dl ; dl = dl->next) { + if (dl->disk.refnum == pd->refnum) { + char *dv = map_dev(dl->major, dl->minor, 0); + if (dv) { + printf("%-15s", dv); + break; + } + } + } + if (!dl) + printf("%15s",""); + printf(" %s%s%s%s%s", (type&2) ? "active":"", - (type&4) ? "Global Spare":"", + (type&4) ? "Global-Spare":"", (type&8) ? "spare" : "", (type&16)? ", foreign" : "", (type&32)? "pass-through" : ""); - printf(" state[%d] : %s%s%s%s%s%s%s\n", i, + printf("/%s%s%s%s%s%s%s", (state&1)? "Online": "Offline", (state&2)? ", Failed": "", (state&4)? ", Rebuilding": "", (state&8)? ", in-transition": "", - (state&16)? ", SMART errors": "", - (state&32)? ", Unrecovered Read Errors": "", + (state&16)? ", SMART-errors": "", + (state&32)? ", Unrecovered-Read-Errors": "", (state&64)? ", Missing" : ""); - printf(" Avail Size[%d] : %llu K\n", i, - __be64_to_cpu(pd->config_size)>>1); - for (dl = sb->dlist; dl ; dl = dl->next) { - if (dl->disk.refnum == pd->refnum) { - char *dv = map_dev(dl->major, dl->minor, 0); - if (dv) - printf(" Device[%d] : %s\n", - i, dv); - } - } printf("\n"); } } @@ -1088,22 +1175,60 @@ static void examine_super_ddf(struct supertype *st, char *homehost) examine_pds(sb); } -static void brief_examine_super_ddf(struct supertype *st) +static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info); + +static void uuid_from_super_ddf(struct supertype *st, int uuid[4]); + +static void brief_examine_super_ddf(struct supertype *st, int verbose) +{ + /* We just write a generic DDF ARRAY entry + */ + struct mdinfo info; + char nbuf[64]; + getinfo_super_ddf(st, &info); + fname_from_uuid(st, &info, nbuf, ':'); + + printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5); +} + +static void brief_examine_subarrays_ddf(struct supertype *st, int verbose) { /* We just write a generic DDF ARRAY entry - * The uuid is all hex, 6 groups of 4 bytes */ struct ddf_super *ddf = st->sb; + struct mdinfo info; int i; - printf("ARRAY /dev/ddf UUID="); - for (i = 0; i < DDF_GUID_LEN; i++) { - printf("%02x", ddf->anchor.guid[i]); - if ((i&3) == 0 && i != 0) - printf(":"); + char nbuf[64]; + getinfo_super_ddf(st, &info); + fname_from_uuid(st, &info, nbuf, ':'); + + for (i=0; i<__be16_to_cpu(ddf->virt->max_vdes); i++) { + struct virtual_entry *ve = &ddf->virt->entries[i]; + struct vcl vcl; + char nbuf1[64]; + if (all_ff(ve->guid)) + continue; + memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN); + ddf->currentconf =&vcl; + uuid_from_super_ddf(st, info.uuid); + fname_from_uuid(st, &info, nbuf1, ':'); + printf("ARRAY container=%s member=%d UUID=%s\n", + nbuf+5, i, nbuf1+5); } - printf("\n"); } +static void export_examine_super_ddf(struct supertype *st) +{ + struct mdinfo info; + char nbuf[64]; + getinfo_super_ddf(st, &info); + fname_from_uuid(st, &info, nbuf, ':'); + printf("MD_METADATA=ddf\n"); + printf("MD_LEVEL=container\n"); + printf("MD_UUID=%s\n", nbuf+5); +} + + static void detail_super_ddf(struct supertype *st, char *homehost) { /* FIXME later @@ -1120,9 +1245,12 @@ static void brief_detail_super_ddf(struct supertype *st) * Can that be stored in ddf_super?? */ // struct ddf_super *ddf = st->sb; + struct mdinfo info; + char nbuf[64]; + getinfo_super_ddf(st, &info); + fname_from_uuid(st, &info, nbuf,':'); + printf(" UUID=%s", nbuf + 5); } - - #endif static int match_home_ddf(struct supertype *st, char *homehost) @@ -1140,18 +1268,17 @@ static int match_home_ddf(struct supertype *st, char *homehost) ddf->controller.vendor_data[len] == 0); } +#ifndef MDASSEMBLE static struct vd_config *find_vdcr(struct ddf_super *ddf, int inst) { struct vcl *v; - if (inst < 0 || inst > __be16_to_cpu(ddf->virt->populated_vdes)) - return NULL; + for (v = ddf->conflist; v; v = v->next) - if (memcmp(v->conf.guid, - ddf->virt->entries[inst].guid, - DDF_GUID_LEN) == 0) + if (inst == v->vcnum) return &v->conf; return NULL; } +#endif static int find_phys(struct ddf_super *ddf, __u32 phys_refnum) { @@ -1171,11 +1298,14 @@ static void uuid_from_super_ddf(struct supertype *st, int uuid[4]) * uuid to put into bitmap file (Create, Grow) * uuid for backup header when saving critical section (Grow) * comparing uuids when re-adding a device into an array + * In these cases the uuid required is that of the data-array, + * not the device-set. + * uuid to recognise same set when adding a missing device back + * to an array. This is a uuid for the device-set. + * * For each of these we can make do with a truncated * or hashed uuid rather than the original, as long as * everyone agrees. - * In each case the uuid required is that of the data-array, - * not the device-set. * In the case of SVD we assume the BVD is of interest, * though that might be the case if a bitmap were made for * a mirrored SVD - worry about that later. @@ -1184,30 +1314,33 @@ static void uuid_from_super_ddf(struct supertype *st, int uuid[4]) * The first 16 bytes of the sha1 of these is used. */ struct ddf_super *ddf = st->sb; - struct vd_config *vd = find_vdcr(ddf, ddf->conf_num); - - if (!vd) - memset(uuid, 0, sizeof (uuid)); - else { - char buf[20]; - struct sha1_ctx ctx; - sha1_init_ctx(&ctx); - sha1_process_bytes(&vd->guid, DDF_GUID_LEN, &ctx); - if (vd->sec_elmnt_count > 1) - sha1_process_bytes(&vd->sec_elmnt_seq, 1, &ctx); - sha1_finish_ctx(&ctx, buf); - memcpy(uuid, buf, sizeof(uuid)); - } + struct vcl *vcl = ddf->currentconf; + char *guid; + char buf[20]; + struct sha1_ctx ctx; + + if (vcl) + guid = vcl->conf.guid; + else + guid = ddf->anchor.guid; + + sha1_init_ctx(&ctx); + sha1_process_bytes(guid, DDF_GUID_LEN, &ctx); + sha1_finish_ctx(&ctx, buf); + memcpy(uuid, buf, 4*4); } +static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info); + static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info) { struct ddf_super *ddf = st->sb; - int i; - info->array.major_version = 1000; - info->array.minor_version = 0; /* FIXME use ddf->revision somehow */ - info->array.patch_version = 0; + if (ddf->currentconf) { + getinfo_super_ddf_bvd(st, info); + return; + } + info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes); info->array.level = LEVEL_CONTAINER; info->array.layout = 0; @@ -1217,43 +1350,35 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info) info->array.utime = 0; info->array.chunk_size = 0; -// info->data_offset = ???; -// info->component_size = ???; info->disk.major = 0; info->disk.minor = 0; if (ddf->dlist) { info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum); - info->disk.raid_disk = -1; - for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes) ; i++) - if (ddf->phys->entries[i].refnum == - ddf->dlist->disk.refnum) { - info->disk.raid_disk = i; - break; - } + info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum); + + info->data_offset = __be64_to_cpu(ddf->phys-> + entries[info->disk.raid_disk]. + config_size); + info->component_size = ddf->dlist->size - info->data_offset; } else { info->disk.number = -1; + info->disk.raid_disk = -1; // info->disk.raid_disk = find refnum in the table and use index; } - info->disk.state = (1 << MD_DISK_SYNC); + info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE); + info->reshape_active = 0; + info->name[0] = 0; + info->array.major_version = -1; + info->array.minor_version = -2; strcpy(info->text_version, "ddf"); + info->safe_mode_delay = 0; -// uuid_from_super_ddf(info->uuid, sbv); - -// info->name[] ?? ; -} - -static void getinfo_super_n_container(struct supertype *st, struct mdinfo *info) -{ - /* just need offset and size */ - struct ddf_super *ddf = st->sb; - int n = info->disk.number; + uuid_from_super_ddf(st, info->uuid); - info->data_offset = __be64_to_cpu(ddf->phys->entries[n].config_size); - info->component_size = 32*1024*1024 / 512; } static int rlq_to_layout(int rlq, int prl, int raiddisks); @@ -1261,59 +1386,72 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks); static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info) { struct ddf_super *ddf = st->sb; - struct vd_config *vd = find_vdcr(ddf, info->container_member); + struct vcl *vc = ddf->currentconf; + int cd = ddf->currentdev; + int j; + struct dl *dl; /* FIXME this returns BVD info - what if we want SVD ?? */ - info->array.major_version = 1000; - info->array.minor_version = 0; /* FIXME use ddf->revision somehow */ - info->array.patch_version = 0; - info->array.raid_disks = __be16_to_cpu(vd->prim_elmnt_count); - info->array.level = map_num1(ddf_level_num, vd->prl); - info->array.layout = rlq_to_layout(vd->rlq, vd->prl, + info->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count); + info->array.level = map_num1(ddf_level_num, vc->conf.prl); + info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl, info->array.raid_disks); info->array.md_minor = -1; - info->array.ctime = DECADE + __be32_to_cpu(*(__u32*)(vd->guid+16)); - info->array.utime = DECADE + __be32_to_cpu(vd->timestamp); - info->array.chunk_size = 512 << vd->chunk_shift; - -// info->data_offset = ???; -// info->component_size = ???; + info->array.ctime = DECADE + + __be32_to_cpu(*(__u32*)(vc->conf.guid+16)); + info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp); + info->array.chunk_size = 512 << vc->conf.chunk_shift; + info->custom_array_size = 0; + + if (cd >= 0 && cd < ddf->mppe) { + info->data_offset = __be64_to_cpu(vc->lba_offset[cd]); + if (vc->block_sizes) + info->component_size = vc->block_sizes[cd]; + else + info->component_size = __be64_to_cpu(vc->conf.blocks); + } + for (dl = ddf->dlist; dl ; dl = dl->next) + if (dl->raiddisk == info->disk.raid_disk) + break; info->disk.major = 0; info->disk.minor = 0; + if (dl) { + info->disk.major = dl->major; + info->disk.minor = dl->minor; + } // info->disk.number = __be32_to_cpu(ddf->disk.refnum); // info->disk.raid_disk = find refnum in the table and use index; // info->disk.state = ???; + info->container_member = ddf->currentconf->vcnum; + info->resync_start = 0; if (!(ddf->virt->entries[info->container_member].state & DDF_state_inconsistent) && (ddf->virt->entries[info->container_member].init_state & DDF_initstate_mask) == DDF_init_full) - info->resync_start = ~0ULL; + info->resync_start = MaxSector; uuid_from_super_ddf(st, info->uuid); - sprintf(info->text_version, "/%s/%d", + info->container_member = atoi(st->subarray); + info->array.major_version = -1; + info->array.minor_version = -2; + sprintf(info->text_version, "/%s/%s", devnum2devname(st->container_dev), - info->container_member); - -// info->name[] ?? ; + st->subarray); + info->safe_mode_delay = 200; + + memcpy(info->name, ddf->virt->entries[info->container_member].name, 16); + info->name[16]=0; + for(j=0; j<16; j++) + if (info->name[j] == ' ') + info->name[j] = 0; } -static void getinfo_super_n_bvd(struct supertype *st, struct mdinfo *info) -{ - /* Find the particular details for info->disk.raid_disk. - * This includes data_offset, component_size, - */ - struct ddf_super *ddf = st->sb; - __u64 *lba_offset = ddf->newconf->lba_offset; - struct vd_config *conf = &ddf->newconf->conf; - info->data_offset = __be64_to_cpu(lba_offset[info->disk.raid_disk]); - info->component_size = __be64_to_cpu(conf->blocks); -} static int update_super_ddf(struct supertype *st, struct mdinfo *info, char *update, @@ -1334,7 +1472,7 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info, * grow: Array has gained a new device - this is currently for * linear only * resync: mark as dirty so a resync will happen. - * uuid: Change the uuid of the array to match watch is given + * uuid: Change the uuid of the array to match what is given * homehost: update the recorded homehost * name: update the name - preserving the homehost * _reshape_progress: record new reshape_progress position. @@ -1349,7 +1487,6 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info, // struct vd_config *vd = find_vdcr(ddf, info->container_member); // struct virtual_entry *ve = find_ve(ddf); - /* we don't need to handle "force-*" or "assemble" as * there is no need to 'trick' the kernel. We the metadata is * first updated to activate the array, all the implied modifications @@ -1389,7 +1526,6 @@ static int update_super_ddf(struct supertype *st, struct mdinfo *info, static void make_header_guid(char *guid) { __u32 stamp; - int rfd; /* Create a DDF Header of Virtual Disk GUID */ /* 24 bytes of fiction required. @@ -1404,12 +1540,16 @@ static void make_header_guid(char *guid) memcpy(guid+12, &stamp, 4); stamp = __cpu_to_be32(time(0) - DECADE); memcpy(guid+16, &stamp, 4); - rfd = open("/dev/urandom", O_RDONLY); - if (rfd < 0 || read(rfd, &stamp, 4) != 4) - stamp = random(); + stamp = random32(); memcpy(guid+20, &stamp, 4); - if (rfd >= 0) close(rfd); } + +static int init_super_ddf_bvd(struct supertype *st, + mdu_array_info_t *info, + unsigned long long size, + char *name, char *homehost, + int *uuid); + static int init_super_ddf(struct supertype *st, mdu_array_info_t *info, unsigned long long size, char *name, char *homehost, @@ -1449,9 +1589,22 @@ static int init_super_ddf(struct supertype *st, struct phys_disk *pd; struct virtual_disk *vd; - ddf = malloc(sizeof(*ddf)); + if (st->sb) + return init_super_ddf_bvd(st, info, size, name, homehost, uuid); + + if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) { + fprintf(stderr, Name ": %s could not allocate superblock\n", __func__); + return 0; + } + memset(ddf, 0, sizeof(*ddf)); ddf->dlist = NULL; /* no physical disks yet */ ddf->conflist = NULL; /* No virtual disks yet */ + st->sb = ddf; + + if (info == NULL) { + /* zeroing superblock */ + return 0; + } /* At least 32MB *must* be reserved for the ddf. So let's just * start 32MB from the end, and put the primary header there. @@ -1464,7 +1617,7 @@ static int init_super_ddf(struct supertype *st, ddf->anchor.magic = DDF_HEADER_MAGIC; make_header_guid(ddf->anchor.guid); - memcpy(ddf->anchor.revision, DDF_REVISION, 8); + memcpy(ddf->anchor.revision, DDF_REVISION_2, 8); ddf->anchor.seq = __cpu_to_be32(1); ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE); ddf->anchor.openflag = 0xFF; @@ -1486,12 +1639,11 @@ static int init_super_ddf(struct supertype *st, ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */ ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */ ddf->max_part = 64; - ddf->conf_rec_len = 1 + 256 * 12 / 512; - ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len); - ddf->anchor.max_primary_element_entries = __cpu_to_be16(256); ddf->mppe = 256; + ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512; + ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len); + ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe); memset(ddf->anchor.pad3, 0xff, 54); - /* controller sections is one sector long immediately * after the ddf header */ sector = 1; @@ -1525,7 +1677,7 @@ static int init_super_ddf(struct supertype *st, __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */ sector += vdsize/512; - clen = (1 + 256*12/512) * (64+1); + clen = ddf->conf_rec_len * (ddf->max_part+1); ddf->anchor.config_section_offset = __cpu_to_be32(sector); ddf->anchor.config_section_length = __cpu_to_be32(clen); sector += clen; @@ -1575,8 +1727,14 @@ static int init_super_ddf(struct supertype *st, memcpy(ddf->controller.product_id, "What Is My PID??", 16); memset(ddf->controller.pad, 0xff, 8); memset(ddf->controller.vendor_data, 0xff, 448); + if (homehost && strlen(homehost) < 440) + strcpy((char*)ddf->controller.vendor_data, homehost); - pd = ddf->phys = malloc(pdsize); + if (posix_memalign((void**)&pd, 512, pdsize) != 0) { + fprintf(stderr, Name ": %s could not allocate pd\n", __func__); + return 0; + } + ddf->phys = pd; ddf->pdsize = pdsize; memset(pd, 0xff, pdsize); @@ -1586,7 +1744,11 @@ static int init_super_ddf(struct supertype *st, pd->max_pdes = __cpu_to_be16(max_phys_disks); memset(pd->pad, 0xff, 52); - vd = ddf->virt = malloc(vdsize); + if (posix_memalign((void**)&vd, 512, vdsize) != 0) { + fprintf(stderr, Name ": %s could not allocate vd\n", __func__); + return 0; + } + ddf->virt = vd; ddf->vdsize = vdsize; memset(vd, 0, vdsize); vd->magic = DDF_VIRT_RECORDS_MAGIC; @@ -1598,17 +1760,10 @@ static int init_super_ddf(struct supertype *st, memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry)); st->sb = ddf; + ddf->updates_pending = 1; return 1; } -static int all_ff(char *guid) -{ - int i; - for (i = 0; i < DDF_GUID_LEN; i++) - if (guid[i] != (char)0xff) - return 0; - return 1; -} static int chunk_to_shift(int chunksize) { return ffs(chunksize/512)-1; @@ -1643,7 +1798,6 @@ static int layout_to_rlq(int level, int layout, int raiddisks) } break; case 5: - case 6: switch(layout) { case ALGORITHM_LEFT_ASYMMETRIC: return DDF_RAID5_N_RESTART; @@ -1654,6 +1808,15 @@ static int layout_to_rlq(int level, int layout, int raiddisks) case ALGORITHM_RIGHT_SYMMETRIC: return -1; /* not mentioned in standard */ } + case 6: + switch(layout) { + case ALGORITHM_ROTATING_N_RESTART: + return DDF_RAID5_N_RESTART; + case ALGORITHM_ROTATING_ZERO_RESTART: + return DDF_RAID6_0_RESTART; + case ALGORITHM_ROTATING_N_CONTINUE: + return DDF_RAID5_N_CONTINUE; + } } return -1; } @@ -1675,7 +1838,6 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks) return -1; /* FIXME this isn't checked */ } case DDF_RAID5: - case DDF_RAID6: switch(rlq) { case DDF_RAID5_N_RESTART: return ALGORITHM_LEFT_ASYMMETRIC; @@ -1686,10 +1848,74 @@ static int rlq_to_layout(int rlq, int prl, int raiddisks) default: return -1; } + case DDF_RAID6: + switch(rlq) { + case DDF_RAID5_N_RESTART: + return ALGORITHM_ROTATING_N_RESTART; + case DDF_RAID6_0_RESTART: + return ALGORITHM_ROTATING_ZERO_RESTART; + case DDF_RAID5_N_CONTINUE: + return ALGORITHM_ROTATING_N_CONTINUE; + default: + return -1; + } } return -1; } +#ifndef MDASSEMBLE +struct extent { + unsigned long long start, size; +}; +static int cmp_extent(const void *av, const void *bv) +{ + const struct extent *a = av; + const struct extent *b = bv; + if (a->start < b->start) + return -1; + if (a->start > b->start) + return 1; + return 0; +} + +static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl) +{ + /* find a list of used extents on the give physical device + * (dnum) of the given ddf. + * Return a malloced array of 'struct extent' + +FIXME ignore DDF_Legacy devices? + + */ + struct extent *rv; + int n = 0; + int i, j; + + rv = malloc(sizeof(struct extent) * (ddf->max_part + 2)); + if (!rv) + return NULL; + + for (i = 0; i < ddf->max_part; i++) { + struct vcl *v = dl->vlist[i]; + if (v == NULL) + continue; + for (j=0; j < v->conf.prim_elmnt_count; j++) + if (v->conf.phys_refnum[j] == dl->disk.refnum) { + /* This device plays role 'j' in 'v'. */ + rv[n].start = __be64_to_cpu(v->lba_offset[j]); + rv[n].size = __be64_to_cpu(v->conf.blocks); + n++; + break; + } + } + qsort(rv, n, sizeof(*rv), cmp_extent); + + rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size); + rv[n].size = 0; + return rv; +} +#endif + static int init_super_ddf_bvd(struct supertype *st, mdu_array_info_t *info, unsigned long long size, @@ -1723,7 +1949,6 @@ static int init_super_ddf_bvd(struct supertype *st, return 0; } ve = &ddf->virt->entries[venum]; - ddf->conf_num = venum; /* A Virtual Disk GUID contains the T10 Vendor ID, controller type, * timestamp, random number @@ -1747,8 +1972,15 @@ static int init_super_ddf_bvd(struct supertype *st, __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1); /* Now create a new vd_config */ - vcl = malloc(offsetof(struct vcl, conf) + ddf->conf_rec_len * 512); + if (posix_memalign((void**)&vcl, 512, + (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) { + fprintf(stderr, Name ": %s could not allocate vd_config\n", __func__); + return 0; + } vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe]; + vcl->vcnum = venum; + sprintf(st->subarray, "%d", venum); + vcl->block_sizes = NULL; /* FIXME not for CONCAT */ vc = &vcl->conf; @@ -1789,14 +2021,16 @@ static int init_super_ddf_bvd(struct supertype *st, memset(vc->vendor, 0xff, 32); memset(vc->phys_refnum, 0xff, 4*ddf->mppe); - memset(vc->phys_refnum+(ddf->mppe * 4), 0x00, 8*ddf->mppe); + memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe); vcl->next = ddf->conflist; ddf->conflist = vcl; - ddf->newconf = vcl; + ddf->currentconf = vcl; + ddf->updates_pending = 1; return 1; } +#ifndef MDASSEMBLE static void add_to_super_ddf_bvd(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { @@ -1806,6 +2040,9 @@ static void add_to_super_ddf_bvd(struct supertype *st, * the phys_refnum and lba_offset for the newly created vd_config. * We might also want to update the type in the phys_disk * section. + * + * Alternately: fd == -1 and we have already chosen which device to + * use and recorded in dlist->raid_disk; */ struct dl *dl; struct ddf_super *ddf = st->sb; @@ -1813,29 +2050,61 @@ static void add_to_super_ddf_bvd(struct supertype *st, __u64 *lba_offset; int working; int i; - int max_virt_disks; + unsigned long long blocks, pos, esize; + struct extent *ex; - for (dl = ddf->dlist; dl ; dl = dl->next) - if (dl->major == dk->major && - dl->minor == dk->minor) - break; + if (fd == -1) { + for (dl = ddf->dlist; dl ; dl = dl->next) + if (dl->raiddisk == dk->raid_disk) + break; + } else { + for (dl = ddf->dlist; dl ; dl = dl->next) + if (dl->major == dk->major && + dl->minor == dk->minor) + break; + } if (!dl || ! (dk->state & (1<newconf->conf; - lba_offset = ddf->newconf->lba_offset; - vc->phys_refnum[dk->raid_disk] = dl->disk.refnum; - lba_offset[dk->raid_disk] = 0; /* FIXME */ + vc = &ddf->currentconf->conf; + lba_offset = ddf->currentconf->lba_offset; - for (i=0; i < ddf->max_part ; i++) - if (dl->vlist[i] == NULL) + ex = get_extents(ddf, dl); + if (!ex) + return; + + i = 0; pos = 0; + blocks = __be64_to_cpu(vc->blocks); + if (ddf->currentconf->block_sizes) + blocks = ddf->currentconf->block_sizes[dk->raid_disk]; + + do { + esize = ex[i].start - pos; + if (esize >= blocks) + break; + pos = ex[i].start + ex[i].size; + i++; + } while (ex[i-1].size); + + free(ex); + if (esize < blocks) + return; + + ddf->currentdev = dk->raid_disk; + vc->phys_refnum[dk->raid_disk] = dl->disk.refnum; + lba_offset[dk->raid_disk] = __cpu_to_be64(pos); + + for (i=0; i < ddf->max_part ; i++) + if (dl->vlist[i] == NULL) break; if (i == ddf->max_part) return; - dl->vlist[i] = ddf->newconf; + dl->vlist[i] = ddf->currentconf; - dl->fd = fd; - dl->devname = devname; + if (fd >= 0) + dl->fd = fd; + if (devname) + dl->devname = devname; /* Check how many working raid_disks, and if we can mark * array as optimal yet @@ -1845,14 +2114,9 @@ static void add_to_super_ddf_bvd(struct supertype *st, for (i=0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) if (vc->phys_refnum[i] != 0xffffffff) working++; + /* Find which virtual_entry */ - max_virt_disks = __be16_to_cpu(ddf->active->max_vd_entries); - for (i=0; i < max_virt_disks ; i++) - if (memcmp(ddf->virt->entries[i].guid, - vc->guid, DDF_GUID_LEN)==0) - break; - if (i == max_virt_disks) - return; + i = ddf->currentconf->vcnum; if (working == __be16_to_cpu(vc->prim_elmnt_count)) ddf->virt->entries[i].state = (ddf->virt->entries[i].state & ~DDF_state_mask) @@ -1866,12 +2130,13 @@ static void add_to_super_ddf_bvd(struct supertype *st, ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare); ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD); + ddf->updates_pending = 1; } /* add a device to a container, either while creating it or while * expanding a pre-existing container */ -static void add_to_super_ddf(struct supertype *st, +static int add_to_super_ddf(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { struct ddf_super *ddf = st->sb; @@ -1883,15 +2148,25 @@ static void add_to_super_ddf(struct supertype *st, int n, i; struct stat stb; + if (ddf->currentconf) { + add_to_super_ddf_bvd(st, dk, fd, devname); + return 0; + } + /* This is device numbered dk->number. We need to create * a phys_disk entry and a more detailed disk_data entry. */ fstat(fd, &stb); - dd = malloc(sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part); + if (posix_memalign((void**)&dd, 512, + sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) { + fprintf(stderr, Name + ": %s could allocate buffer for new disk, aborting\n", + __func__); + return 1; + } dd->major = major(stb.st_rdev); dd->minor = minor(stb.st_rdev); dd->devname = devname; - dd->next = ddf->dlist; dd->fd = fd; dd->spare = NULL; @@ -1900,10 +2175,18 @@ static void add_to_super_ddf(struct supertype *st, tm = localtime(&now); sprintf(dd->disk.guid, "%8s%04d%02d%02d", T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday); - *(__u32*)(dd->disk.guid + 16) = random(); - *(__u32*)(dd->disk.guid + 20) = random(); + *(__u32*)(dd->disk.guid + 16) = random32(); + *(__u32*)(dd->disk.guid + 20) = random32(); + + do { + /* Cannot be bothered finding a CRC of some irrelevant details*/ + dd->disk.refnum = random32(); + for (i = __be16_to_cpu(ddf->active->max_pd_entries) - 1; + i >= 0; i--) + if (ddf->phys->entries[i].refnum == dd->disk.refnum) + break; + } while (i >= 0); - dd->disk.refnum = random(); /* and hope for the best FIXME check this is unique!!*/ dd->disk.forced_ref = 1; dd->disk.forced_guid = 1; memset(dd->disk.vendor, ' ', 32); @@ -1916,8 +2199,20 @@ static void add_to_super_ddf(struct supertype *st, pde = &ddf->phys->entries[n]; dd->pdnum = n; - n++; - ddf->phys->used_pdes = __cpu_to_be16(n); + if (st->update_tail) { + int len = (sizeof(struct phys_disk) + + sizeof(struct phys_disk_entry)); + struct phys_disk *pd; + + pd = malloc(len); + pd->magic = DDF_PHYS_RECORDS_MAGIC; + pd->used_pdes = __cpu_to_be16(n); + pde = &pd->entries[0]; + dd->mdupdate = pd; + } else { + n++; + ddf->phys->used_pdes = __cpu_to_be16(n); + } memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN); pde->refnum = dd->disk.refnum; @@ -1929,7 +2224,17 @@ static void add_to_super_ddf(struct supertype *st, sprintf(pde->path, "%17.17s","Information: nil") ; memset(pde->pad, 0xff, 6); - ddf->dlist = dd; + dd->size = size >> 9; + if (st->update_tail) { + dd->next = ddf->add_list; + ddf->add_list = dd; + } else { + dd->next = ddf->dlist; + ddf->dlist = dd; + ddf->updates_pending = 1; + } + + return 0; } /* @@ -1938,7 +2243,8 @@ static void add_to_super_ddf(struct supertype *st, * container. */ -#ifndef MDASSEMBLE +static unsigned char null_conf[4096+512]; + static int __write_init_super_ddf(struct supertype *st, int do_close) { @@ -1947,15 +2253,20 @@ static int __write_init_super_ddf(struct supertype *st, int do_close) struct dl *d; int n_config; int conf_size; - + int attempts = 0; + int successes = 0; unsigned long long size, sector; + /* try to write updated metadata, + * if we catch a failure move on to the next disk + */ for (d = ddf->dlist; d; d=d->next) { int fd = d->fd; if (fd < 0) continue; + attempts++; /* We need to fill in the primary, (secondary) and workspace * lba's in the headers, set their checksums, * Also checksum phys, virt.... @@ -1985,17 +2296,21 @@ static int __write_init_super_ddf(struct supertype *st, int do_close) sector = size - 16*1024*2; lseek64(fd, sector<<9, 0); - write(fd, &ddf->primary, 512); + if (write(fd, &ddf->primary, 512) < 0) + continue; ddf->controller.crc = calc_crc(&ddf->controller, 512); - write(fd, &ddf->controller, 512); + if (write(fd, &ddf->controller, 512) < 0) + continue; ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize); - write(fd, ddf->phys, ddf->pdsize); + if (write(fd, ddf->phys, ddf->pdsize) < 0) + continue; ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize); - write(fd, ddf->virt, ddf->vdsize); + if (write(fd, ddf->virt, ddf->vdsize) < 0) + continue; /* Now write lots of config records. */ n_config = ddf->max_part; @@ -2007,31 +2322,92 @@ static int __write_init_super_ddf(struct supertype *st, int do_close) if (c) { c->conf.crc = calc_crc(&c->conf, conf_size); - write(fd, &c->conf, conf_size); + if (write(fd, &c->conf, conf_size) < 0) + break; } else { - __u32 sig = 0xffffffff; - write(fd, &sig, 4); - lseek64(fd, conf_size-4, SEEK_CUR); + char *null_aligned = (char*)((((unsigned long)null_conf)+511)&~511UL); + if (null_conf[0] != 0xff) + memset(null_conf, 0xff, sizeof(null_conf)); + int togo = conf_size; + while (togo > sizeof(null_conf)-512) { + if (write(fd, null_aligned, sizeof(null_conf)-512) < 0) + break; + togo -= sizeof(null_conf)-512; + } + if (write(fd, null_aligned, togo) < 0) + break; } } + if (i <= n_config) + continue; d->disk.crc = calc_crc(&d->disk, 512); - write(fd, &d->disk, 512); + if (write(fd, &d->disk, 512) < 0) + continue; /* Maybe do the same for secondary */ lseek64(fd, (size-1)*512, SEEK_SET); - write(fd, &ddf->anchor, 512); - if (do_close) { - close(fd); + if (write(fd, &ddf->anchor, 512) < 0) + continue; + successes++; + } + + if (do_close) + for (d = ddf->dlist; d; d=d->next) { + close(d->fd); d->fd = -1; } - } - return 1; + + return attempts != successes; } static int write_init_super_ddf(struct supertype *st) { - return __write_init_super_ddf(st, 1); + struct ddf_super *ddf = st->sb; + struct vcl *currentconf = ddf->currentconf; + + /* we are done with currentconf reset it to point st at the container */ + ddf->currentconf = NULL; + + if (st->update_tail) { + /* queue the virtual_disk and vd_config as metadata updates */ + struct virtual_disk *vd; + struct vd_config *vc; + int len; + + if (!currentconf) { + int len = (sizeof(struct phys_disk) + + sizeof(struct phys_disk_entry)); + + /* adding a disk to the container. */ + if (!ddf->add_list) + return 0; + + append_metadata_update(st, ddf->add_list->mdupdate, len); + ddf->add_list->mdupdate = NULL; + return 0; + } + + /* Newly created VD */ + + /* First the virtual disk. We have a slightly fake header */ + len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry); + vd = malloc(len); + *vd = *ddf->virt; + vd->entries[0] = ddf->virt->entries[currentconf->vcnum]; + vd->populated_vdes = __cpu_to_be16(currentconf->vcnum); + append_metadata_update(st, vd, len); + + /* Then the vd_config */ + len = ddf->conf_rec_len * 512; + vc = malloc(len); + memcpy(vc, ¤tconf->conf, len); + append_metadata_update(st, vc, len); + + /* FIXME I need to close the fds! */ + return 0; + } else + return __write_init_super_ddf(st, 1); } #endif @@ -2045,10 +2421,114 @@ static __u64 avail_size_ddf(struct supertype *st, __u64 devsize) } #ifndef MDASSEMBLE -int validate_geometry_ddf(struct supertype *st, - int level, int layout, int raiddisks, - int chunk, unsigned long long size, - char *dev, unsigned long long *freesize) + +static int reserve_space(struct supertype *st, int raiddisks, + unsigned long long size, int chunk, + unsigned long long *freesize) +{ + /* Find 'raiddisks' spare extents at least 'size' big (but + * only caring about multiples of 'chunk') and remember + * them. + * If the cannot be found, fail. + */ + struct dl *dl; + struct ddf_super *ddf = st->sb; + int cnt = 0; + + for (dl = ddf->dlist; dl ; dl=dl->next) { + dl->raiddisk = -1; + dl->esize = 0; + } + /* Now find largest extent on each device */ + for (dl = ddf->dlist ; dl ; dl=dl->next) { + struct extent *e = get_extents(ddf, dl); + unsigned long long pos = 0; + int i = 0; + int found = 0; + unsigned long long minsize = size; + + if (size == 0) + minsize = chunk; + + if (!e) + continue; + do { + unsigned long long esize; + esize = e[i].start - pos; + if (esize >= minsize) { + found = 1; + minsize = esize; + } + pos = e[i].start + e[i].size; + i++; + } while (e[i-1].size); + if (found) { + cnt++; + dl->esize = minsize; + } + free(e); + } + if (cnt < raiddisks) { + fprintf(stderr, Name ": not enough devices with space to create array.\n"); + return 0; /* No enough free spaces large enough */ + } + if (size == 0) { + /* choose the largest size of which there are at least 'raiddisk' */ + for (dl = ddf->dlist ; dl ; dl=dl->next) { + struct dl *dl2; + if (dl->esize <= size) + continue; + /* This is bigger than 'size', see if there are enough */ + cnt = 0; + for (dl2 = dl; dl2 ; dl2=dl2->next) + if (dl2->esize >= dl->esize) + cnt++; + if (cnt >= raiddisks) + size = dl->esize; + } + if (chunk) { + size = size / chunk; + size *= chunk; + } + *freesize = size; + if (size < 32) { + fprintf(stderr, Name ": not enough spare devices to create array.\n"); + return 0; + } + } + /* We have a 'size' of which there are enough spaces. + * We simply do a first-fit */ + cnt = 0; + for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) { + if (dl->esize < size) + continue; + + dl->raiddisk = cnt; + cnt++; + } + return 1; +} + + + +static int +validate_geometry_ddf_container(struct supertype *st, + int level, int layout, int raiddisks, + int chunk, unsigned long long size, + char *dev, unsigned long long *freesize, + int verbose); + +static int validate_geometry_ddf_bvd(struct supertype *st, + int level, int layout, int raiddisks, + int chunk, unsigned long long size, + char *dev, unsigned long long *freesize, + int verbose); + +static int validate_geometry_ddf(struct supertype *st, + int level, int layout, int raiddisks, + int chunk, unsigned long long size, + char *dev, unsigned long long *freesize, + int verbose) { int fd; struct mdinfo *sra; @@ -2062,71 +2542,89 @@ int validate_geometry_ddf(struct supertype *st, */ if (level == LEVEL_CONTAINER) { - st->ss = &super_ddf_container; - if (dev) { - int rv =st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, - size, - NULL, freesize); - if (rv) - return rv; - } - return st->ss->validate_geometry(st, level, layout, raiddisks, - chunk, size, dev, freesize); + /* Must be a fresh device to add to a container */ + return validate_geometry_ddf_container(st, level, layout, + raiddisks, chunk, + size, dev, freesize, + verbose); } - if (st->sb) { - /* creating in a given container */ - st->ss = &super_ddf_bvd; - if (dev) { - int rv =st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, - size, - NULL, freesize); - if (rv) - return rv; + if (!dev) { + /* Initial sanity check. Exclude illegal levels. */ + int i; + for (i=0; ddf_level_num[i].num1 != MAXINT; i++) + if (ddf_level_num[i].num2 == level) + break; + if (ddf_level_num[i].num1 == MAXINT) { + if (verbose) + fprintf(stderr, Name ": DDF does not support level %d arrays\n", + level); + return 0; + } + /* Should check layout? etc */ + + if (st->sb && freesize) { + /* --create was given a container to create in. + * So we need to check that there are enough + * free spaces and return the amount of space. + * We may as well remember which drives were + * chosen so that add_to_super/getinfo_super + * can return them. + */ + return reserve_space(st, raiddisks, size, chunk, freesize); } - return st->ss->validate_geometry(st, level, layout, raiddisks, - chunk, size, dev, freesize); - } - /* FIXME should exclude MULTIPATH, or more appropriately, allow - * only known levels. - */ - if (!dev) return 1; + } - /* This device needs to be either a device in a 'ddf' container, - * or it needs to be a 'ddf-bvd' array. + if (st->sb) { + /* A container has already been opened, so we are + * creating in there. Maybe a BVD, maybe an SVD. + * Should make a distinction one day. + */ + return validate_geometry_ddf_bvd(st, level, layout, raiddisks, + chunk, size, dev, freesize, + verbose); + } + /* This is the first device for the array. + * If it is a container, we read it in and do automagic allocations, + * no other devices should be given. + * Otherwise it must be a member device of a container, and we + * do manual allocation. + * Later we should check for a BVD and make an SVD. */ - fd = open(dev, O_RDONLY|O_EXCL, 0); if (fd >= 0) { sra = sysfs_read(fd, 0, GET_VERSION); close(fd); if (sra && sra->array.major_version == -1 && - strcmp(sra->text_version, "ddf-bvd") == 0) { - st->ss = &super_ddf_svd; - return st->ss->validate_geometry(st, level, layout, - raiddisks, chunk, size, - dev, freesize); + strcmp(sra->text_version, "ddf") == 0) { + + /* load super */ + /* find space for 'n' devices. */ + /* remember the devices */ + /* Somehow return the fact that we have enough */ } - fprintf(stderr, - Name ": Cannot create this array on device %s\n", - dev); + if (verbose) + fprintf(stderr, + Name ": ddf: Cannot create this array " + "on device %s - a container is required.\n", + dev); return 0; } if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) { - fprintf(stderr, Name ": Cannot open %s: %s\n", - dev, strerror(errno)); + if (verbose) + fprintf(stderr, Name ": ddf: Cannot open %s: %s\n", + dev, strerror(errno)); return 0; } /* Well, it is in use by someone, maybe a 'ddf' container. */ cfd = open_container(fd); if (cfd < 0) { close(fd); - fprintf(stderr, Name ": Cannot use %s: It is busy\n", - dev); + if (verbose) + fprintf(stderr, Name ": ddf: Cannot use %s: %s\n", + dev, strerror(EBUSY)); return 0; } sra = sysfs_read(cfd, 0, GET_VERSION); @@ -2137,26 +2635,28 @@ int validate_geometry_ddf(struct supertype *st, * and try to create a bvd */ struct ddf_super *ddf; - st->ss = &super_ddf_bvd; if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) { st->sb = ddf; st->container_dev = fd2devnum(cfd); close(cfd); - return st->ss->validate_geometry(st, level, layout, + return validate_geometry_ddf_bvd(st, level, layout, raiddisks, chunk, size, - dev, freesize); + dev, freesize, + verbose); } close(cfd); - } - fprintf(stderr, Name ": Cannot use %s: Already in use\n", - dev); + } else /* device may belong to a different container */ + return 0; + return 1; } -int validate_geometry_ddf_container(struct supertype *st, - int level, int layout, int raiddisks, - int chunk, unsigned long long size, - char *dev, unsigned long long *freesize) +static int +validate_geometry_ddf_container(struct supertype *st, + int level, int layout, int raiddisks, + int chunk, unsigned long long size, + char *dev, unsigned long long *freesize, + int verbose) { int fd; unsigned long long ldsize; @@ -2168,8 +2668,9 @@ int validate_geometry_ddf_container(struct supertype *st, fd = open(dev, O_RDONLY|O_EXCL, 0); if (fd < 0) { - fprintf(stderr, Name ": Cannot open %s: %s\n", - dev, strerror(errno)); + if (verbose) + fprintf(stderr, Name ": ddf: Cannot open %s: %s\n", + dev, strerror(errno)); return 0; } if (!get_dev_size(fd, dev, &ldsize)) { @@ -2179,76 +2680,17 @@ int validate_geometry_ddf_container(struct supertype *st, close(fd); *freesize = avail_size_ddf(st, ldsize >> 9); + if (*freesize == 0) + return 0; return 1; } -struct extent { - unsigned long long start, size; -}; -int cmp_extent(const void *av, const void *bv) -{ - const struct extent *a = av; - const struct extent *b = bv; - if (a->start < b->start) - return -1; - if (a->start > b->start) - return 1; - return 0; -} - -struct extent *get_extents(struct ddf_super *ddf, struct dl *dl) -{ - /* find a list of used extents on the give physical device - * (dnum) of the given ddf. - * Return a malloced array of 'struct extent' - -FIXME ignore DDF_Legacy devices? - - */ - struct extent *rv; - int n = 0; - int dnum; - int i, j; - - /* FIXME this is dl->pdnum */ - for (dnum = 0; dnum < ddf->phys->used_pdes; dnum++) - if (memcmp(dl->disk.guid, - ddf->phys->entries[dnum].guid, - DDF_GUID_LEN) == 0) - break; - - if (dnum == ddf->phys->used_pdes) - return NULL; - - rv = malloc(sizeof(struct extent) * (ddf->max_part + 2)); - if (!rv) - return NULL; - - for (i = 0; i < ddf->max_part; i++) { - struct vcl *v = dl->vlist[i]; - if (v == NULL) - continue; - for (j=0; j < v->conf.prim_elmnt_count; j++) - if (v->conf.phys_refnum[j] == dl->disk.refnum) { - /* This device plays role 'j' in 'v'. */ - rv[n].start = __be64_to_cpu(v->lba_offset[j]); - rv[n].size = __be64_to_cpu(v->conf.blocks); - n++; - break; - } - } - qsort(rv, n, sizeof(*rv), cmp_extent); - - rv[n].start = __be64_to_cpu(ddf->phys->entries[dnum].config_size); - rv[n].size = 0; - return rv; -} - -int validate_geometry_ddf_bvd(struct supertype *st, - int level, int layout, int raiddisks, - int chunk, unsigned long long size, - char *dev, unsigned long long *freesize) +static int validate_geometry_ddf_bvd(struct supertype *st, + int level, int layout, int raiddisks, + int chunk, unsigned long long size, + char *dev, unsigned long long *freesize, + int verbose) { struct stat stb; struct ddf_super *ddf = st->sb; @@ -2258,8 +2700,11 @@ int validate_geometry_ddf_bvd(struct supertype *st, struct extent *e; int i; /* ddf/bvd supports lots of things, but not containers */ - if (level == LEVEL_CONTAINER) + if (level == LEVEL_CONTAINER) { + if (verbose) + fprintf(stderr, Name ": DDF cannot create a container within an container\n"); return 0; + } /* We must have the container info already read in. */ if (!ddf) return 0; @@ -2293,9 +2738,11 @@ int validate_geometry_ddf_bvd(struct supertype *st, free(e); } if (dcnt < raiddisks) { - fprintf(stderr, Name ": Not enough devices with space " - "for this array (%d < %d)\n", - dcnt, raiddisks); + if (verbose) + fprintf(stderr, + Name ": ddf: Not enough devices with " + "space for this array (%d < %d)\n", + dcnt, raiddisks); return 0; } return 1; @@ -2311,8 +2758,10 @@ int validate_geometry_ddf_bvd(struct supertype *st, break; } if (!dl) { - fprintf(stderr, Name ": %s is not in the same DDF set\n", - dev); + if (verbose) + fprintf(stderr, Name ": ddf: %s is not in the " + "same DDF set\n", + dev); return 0; } e = get_extents(ddf, dl); @@ -2331,19 +2780,6 @@ int validate_geometry_ddf_bvd(struct supertype *st, return 1; } -int validate_geometry_ddf_svd(struct supertype *st, - int level, int layout, int raiddisks, - int chunk, unsigned long long size, - char *dev, unsigned long long *freesize) -{ - /* dd/svd only supports striped, mirrored, concat, spanned... */ - if (level != LEVEL_LINEAR && - level != 0 && - level != 1) - return 0; - return 1; -} - static int load_super_ddf_all(struct supertype *st, int fd, void **sbp, char *devname, int keep_fd) @@ -2355,8 +2791,14 @@ static int load_super_ddf_all(struct supertype *st, int fd, int seq; char nm[20]; int dfd; + int devnum = fd2devnum(fd); + enum sysfs_read_flags flags; + + flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; + if (mdmon_running(devnum)) + flags |= SKIP_GONE_DEVS; - sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); + sra = sysfs_read(fd, 0, flags); if (!sra) return 1; if (sra->array.major_version != -1 || @@ -2364,8 +2806,7 @@ static int load_super_ddf_all(struct supertype *st, int fd, strcmp(sra->text_version, "ddf") != 0) return 1; - super = malloc(sizeof(*super)); - if (!super) + if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0) return 1; memset(super, 0, sizeof(*super)); @@ -2400,25 +2841,39 @@ static int load_super_ddf_all(struct supertype *st, int fd, close(dfd); /* Now we need the device-local bits */ for (sd = sra->devs ; sd ; sd = sd->next) { + int rv; + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY); if (dfd < 0) return 2; - seq = load_ddf_local(dfd, super, NULL, keep_fd); + rv = load_ddf_headers(dfd, super, NULL); + if (rv == 0) + rv = load_ddf_local(dfd, super, NULL, keep_fd); if (!keep_fd) close(dfd); + if (rv) + return 1; + } + if (st->subarray[0]) { + struct vcl *v; + + for (v = super->conflist; v; v = v->next) + if (v->vcnum == atoi(st->subarray)) + super->currentconf = v; + if (!super->currentconf) + return 1; } *sbp = super; if (st->ss == NULL) { - st->ss = &super_ddf_container; + st->ss = &super_ddf; st->minor_version = 0; st->max_devs = 512; st->container_dev = fd2devnum(fd); } + st->loaded_container = 1; return 0; } -#endif - - +#endif /* MDASSEMBLE */ static struct mdinfo *container_content_ddf(struct supertype *st) { @@ -2437,31 +2892,28 @@ static struct mdinfo *container_content_ddf(struct supertype *st) for (vc = ddf->conflist ; vc ; vc=vc->next) { int i; + int j; struct mdinfo *this; this = malloc(sizeof(*this)); memset(this, 0, sizeof(*this)); this->next = rest; rest = this; - this->array.major_version = 1000; - this->array.minor_version = 0; - this->array.patch_version = 0; this->array.level = map_num1(ddf_level_num, vc->conf.prl); this->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count); this->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl, this->array.raid_disks); this->array.md_minor = -1; + this->array.major_version = -1; + this->array.minor_version = -2; this->array.ctime = DECADE + __be32_to_cpu(*(__u32*)(vc->conf.guid+16)); this->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp); this->array.chunk_size = 512 << vc->conf.chunk_shift; - for (i=0; i < __be16_to_cpu(ddf->virt->populated_vdes); i++) - if (memcmp(ddf->virt->entries[i].guid, - vc->conf.guid, DDF_GUID_LEN) == 0) - break; + i = vc->vcnum; if ((ddf->virt->entries[i].state & DDF_state_inconsistent) || (ddf->virt->entries[i].init_state & DDF_initstate_mask) != DDF_init_full) { @@ -2469,21 +2921,27 @@ static struct mdinfo *container_content_ddf(struct supertype *st) this->resync_start = 0; } else { this->array.state = 1; - this->resync_start = ~0ULL; + this->resync_start = MaxSector; } - memcpy(this->name, ddf->virt->entries[i].name, 32); - this->name[33]=0; + memcpy(this->name, ddf->virt->entries[i].name, 16); + this->name[16]=0; + for(j=0; j<16; j++) + if (this->name[j] == ' ') + this->name[j] = 0; memset(this->uuid, 0, sizeof(this->uuid)); this->component_size = __be64_to_cpu(vc->conf.blocks); this->array.size = this->component_size / 2; this->container_member = i; + ddf->currentconf = vc; + uuid_from_super_ddf(st, this->uuid); + ddf->currentconf = NULL; + sprintf(this->text_version, "/%s/%d", devnum2devname(st->container_dev), this->container_member); - for (i=0 ; i < ddf->mppe ; i++) { struct mdinfo *dev; struct dl *d; @@ -2497,7 +2955,8 @@ static struct mdinfo *container_content_ddf(struct supertype *st) if (d->disk.refnum == vc->conf.phys_refnum[i]) break; if (d == NULL) - break; + /* Haven't found that one yet, maybe there are others */ + continue; dev = malloc(sizeof(*dev)); memset(dev, 0, sizeof(*dev)); @@ -2509,9 +2968,10 @@ static struct mdinfo *container_content_ddf(struct supertype *st) dev->disk.minor = d->minor; dev->disk.raid_disk = i; dev->disk.state = (1<recovery_start = MaxSector; - dev->events = __le32_to_cpu(ddf->primary.seq); - dev->data_offset = vc->lba_offset[i]; + dev->events = __be32_to_cpu(ddf->primary.seq); + dev->data_offset = __be64_to_cpu(vc->lba_offset[i]); dev->component_size = __be64_to_cpu(vc->conf.blocks); if (d->devname) strcpy(dev->name, d->devname); @@ -2520,27 +2980,34 @@ static struct mdinfo *container_content_ddf(struct supertype *st) return rest; } -static int init_zero_ddf(struct supertype *st, - mdu_array_info_t *info, - unsigned long long size, char *name, - char *homehost, int *uuid) -{ - st->sb = NULL; - return 0; -} - -static int store_zero_ddf(struct supertype *st, int fd) +static int store_super_ddf(struct supertype *st, int fd) { + struct ddf_super *ddf = st->sb; unsigned long long dsize; - char buf[512]; - memset(buf, 0, 512); + void *buf; + int rc; + if (!ddf) + return 1; + + /* ->dlist and ->conflist will be set for updates, currently not + * supported + */ + if (ddf->dlist || ddf->conflist) + return 1; if (!get_dev_size(fd, NULL, &dsize)) return 1; + if (posix_memalign(&buf, 512, 512) != 0) + return 1; + memset(buf, 0, 512); + lseek64(fd, dsize-512, 0); - write(fd, buf, 512); + rc = write(fd, buf, 512); + free(buf); + if (rc < 0) + return 1; return 0; } @@ -2569,6 +3036,7 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst) return 0; } +#ifndef MDASSEMBLE /* * A new array 'a' has been started which claims to be instance 'inst' * within container 'c'. @@ -2577,7 +3045,7 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst) */ static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst) { - fprintf(stderr, "ddf: open_new %s\n", inst); + dprintf("ddf: open_new %s\n", inst); a->info.container_member = atoi(inst); return 0; } @@ -2591,24 +3059,38 @@ static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst) * For DDF, we need to clear the DDF_state_inconsistent bit in the * !global! virtual_disk.virtual_entry structure. */ -static void ddf_set_array_state(struct active_array *a, int consistent) +static int ddf_set_array_state(struct active_array *a, int consistent) { struct ddf_super *ddf = a->container->sb; int inst = a->info.container_member; + int old = ddf->virt->entries[inst].state; + if (consistent == 2) { + /* Should check if a recovery should be started FIXME */ + consistent = 1; + if (!is_resync_complete(&a->info)) + consistent = 0; + } if (consistent) ddf->virt->entries[inst].state &= ~DDF_state_inconsistent; else ddf->virt->entries[inst].state |= DDF_state_inconsistent; + if (old != ddf->virt->entries[inst].state) + ddf->updates_pending = 1; + + old = ddf->virt->entries[inst].init_state; ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask; - if (a->resync_start == ~0ULL) + if (is_resync_complete(&a->info)) ddf->virt->entries[inst].init_state |= DDF_init_full; - else if (a->resync_start == 0) + else if (a->info.resync_start == 0) ddf->virt->entries[inst].init_state |= DDF_init_not; else ddf->virt->entries[inst].init_state |= DDF_init_quick; + if (old != ddf->virt->entries[inst].init_state) + ddf->updates_pending = 1; - printf("ddf mark %s %llu\n", consistent?"clean":"dirty", - a->resync_start); + dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty", + a->info.resync_start); + return consistent; } /* @@ -2634,7 +3116,7 @@ static void ddf_set_disk(struct active_array *a, int n, int state) int i, st, working; if (vc == NULL) { - fprintf(stderr, "ddf: cannot find instance %d!!\n", inst); + dprintf("ddf: cannot find instance %d!!\n", inst); return; } if (pd < 0) { @@ -2648,15 +3130,18 @@ static void ddf_set_disk(struct active_array *a, int n, int state) /* FIXME */ } } else { + int old = ddf->phys->entries[pd].state; if (state & DS_FAULTY) ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed); if (state & DS_INSYNC) { ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online); ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding); } + if (old != ddf->phys->entries[pd].state) + ddf->updates_pending = 1; } - fprintf(stderr, "ddf: set_disk %d to %x\n", n, state); + dprintf("ddf: set_disk %d to %x\n", n, state); /* Now we need to check the state of the array and update * virtual_disk.entries[n].state. @@ -2699,9 +3184,15 @@ static void ddf_set_disk(struct active_array *a, int n, int state) break; } - ddf->virt->entries[inst].state = - (ddf->virt->entries[inst].state & ~DDF_state_mask) - | state; + if (ddf->virt->entries[inst].state != + ((ddf->virt->entries[inst].state & ~DDF_state_mask) + | state)) { + + ddf->virt->entries[inst].state = + (ddf->virt->entries[inst].state & ~DDF_state_mask) + | state; + ddf->updates_pending = 1; + } } @@ -2715,8 +3206,12 @@ static void ddf_sync_metadata(struct supertype *st) * but ddf is sufficiently weird that it probably always * changes global data .... */ + struct ddf_super *ddf = st->sb; + if (!ddf->updates_pending) + return; + ddf->updates_pending = 0; __write_init_super_ddf(st, 0); - fprintf(stderr, "ddf: sync_metadata\n"); + dprintf("ddf: sync_metadata\n"); } static void ddf_process_update(struct supertype *st, @@ -2759,7 +3254,7 @@ static void ddf_process_update(struct supertype *st, int mppe; int ent; - printf("Process update %x\n", *magic); + dprintf("Process update %x\n", *magic); switch (*magic) { case DDF_PHYS_RECORDS_MAGIC: @@ -2777,6 +3272,21 @@ static void ddf_process_update(struct supertype *st, ddf->phys->entries[ent] = pd->entries[0]; ddf->phys->used_pdes = __cpu_to_be16(1 + __be16_to_cpu(ddf->phys->used_pdes)); + ddf->updates_pending = 1; + if (ddf->add_list) { + struct active_array *a; + struct dl *al = ddf->add_list; + ddf->add_list = al->next; + + al->next = ddf->dlist; + ddf->dlist = al; + + /* As a device has been added, we should check + * for any degraded devices that might make + * use of this spare */ + for (a = st->arrays ; a; a=a->next) + a->check_degraded = 1; + } break; case DDF_VIRT_RECORDS_MAGIC: @@ -2794,19 +3304,20 @@ static void ddf_process_update(struct supertype *st, ddf->virt->entries[ent] = vd->entries[0]; ddf->virt->populated_vdes = __cpu_to_be16(1 + __be16_to_cpu(ddf->virt->populated_vdes)); + ddf->updates_pending = 1; break; case DDF_VD_CONF_MAGIC: - printf("len %d %d\n", update->len, ddf->conf_rec_len); + dprintf("len %d %d\n", update->len, ddf->conf_rec_len); mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries); - if (update->len != ddf->conf_rec_len) + if (update->len != ddf->conf_rec_len * 512) return; vc = (struct vd_config*)update->buf; for (vcl = ddf->conflist; vcl ; vcl = vcl->next) if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0) break; - printf("vcl = %p\n", vcl); + dprintf("vcl = %p\n", vcl); if (vcl) { /* An update, just copy the phys_refnum and lba_offset * fields @@ -2815,10 +3326,12 @@ static void ddf_process_update(struct supertype *st, mppe * (sizeof(__u32) + sizeof(__u64))); } else { /* A new VD_CONF */ + if (!update->space) + return; vcl = update->space; update->space = NULL; vcl->next = ddf->conflist; - vcl->conf = *vc; + memcpy(&vcl->conf, vc, update->len); vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[mppe]; ddf->conflist = vcl; @@ -2831,8 +3344,8 @@ static void ddf_process_update(struct supertype *st, for (dn=0; dn < ddf->mppe ; dn++) if (vcl->conf.phys_refnum[dn] == dl->disk.refnum) { - printf("dev %d has %p at %d\n", - dl->pdnum, vcl, vn); + dprintf("dev %d has %p at %d\n", + dl->pdnum, vcl, vn); dl->vlist[vn++] = vcl; break; } @@ -2858,12 +3371,29 @@ static void ddf_process_update(struct supertype *st, DDF_Active_in_VD); } } + ddf->updates_pending = 1; break; case DDF_SPARE_ASSIGN_MAGIC: default: break; } } +static void ddf_prepare_update(struct supertype *st, + struct metadata_update *update) +{ + /* This update arrived at managemon. + * We are about to pass it to monitor. + * If a malloc is needed, do it here. + */ + struct ddf_super *ddf = st->sb; + __u32 *magic = (__u32*)update->buf; + if (*magic == DDF_VD_CONF_MAGIC) + if (posix_memalign(&update->space, 512, + offsetof(struct vcl, conf) + + ddf->conf_rec_len * 512) != 0) + update->space = NULL; +} + /* * Check if the array 'a' is degraded but not failed. * If it is, find as many spares as are available and needed and @@ -2894,10 +3424,6 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, struct vd_config *vc; __u64 *lba; -/* FIXME, If there is a DS_FAULTY, we want to wait for it to be - * removed. Then only look at DS_REMOVE devices. - * What about !DS_INSYNC - how can that happen? - */ for (d = a->info.devs ; d ; d = d->next) { if ((d->curr_state & DS_FAULTY) && d->state_fd >= 0) @@ -2907,8 +3433,8 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, working ++; } - printf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks, - a->info.array.level); + dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks, + a->info.array.level); if (working == a->info.array.raid_disks) return NULL; /* array not degraded */ switch (a->info.array.level) { @@ -2935,7 +3461,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, for (d = a->info.devs ; d ; d = d->next) if (d->disk.raid_disk == i) break; - printf("found %d: %p %x\n", i, d, d?d->curr_state:0); + dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0); if (d && (d->state_fd >= 0)) continue; @@ -2953,7 +3479,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, for (d2 = a->info.devs ; d2 ; d2 = d2->next) if (d2->disk.major == dl->major && d2->disk.minor == dl->minor) { - printf("%x:%x already in array\n", dl->major, dl->minor); + dprintf("%x:%x already in array\n", dl->major, dl->minor); break; } if (d2) @@ -2981,7 +3507,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, } if ( ! (is_dedicated || (is_global && global_ok))) { - printf("%x:%x not suitable: %d %d\n", dl->major, dl->minor, + dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor, is_dedicated, is_global); continue; } @@ -2990,7 +3516,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, * We need a->info.component_size sectors */ ex = get_extents(ddf, dl); if (!ex) { - printf("cannot get extents\n"); + dprintf("cannot get extents\n"); continue; } j = 0; pos = 0; @@ -3006,27 +3532,30 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, free(ex); if (esize < a->info.component_size) { - printf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor, - esize, a->info.component_size); + dprintf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor, + esize, a->info.component_size); /* No room */ continue; } /* Cool, we have a device with some space at pos */ di = malloc(sizeof(*di)); + if (!di) + continue; memset(di, 0, sizeof(*di)); di->disk.number = i; di->disk.raid_disk = i; di->disk.major = dl->major; di->disk.minor = dl->minor; di->disk.state = 0; + di->recovery_start = 0; di->data_offset = pos; di->component_size = a->info.component_size; di->container_member = dl->pdnum; di->next = rv; rv = di; - printf("%x:%x to be %d at %llu\n", dl->major, dl->minor, - i, pos); + dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor, + i, pos); break; } @@ -3045,9 +3574,22 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, * Create a metadata_update record to update the * phys_refnum and lba_offset values */ - mu = malloc(sizeof(*mu) + ddf->conf_rec_len * 512); - mu->buf = (char*)(mu+1); - mu->space = malloc(sizeof(struct vcl)); + mu = malloc(sizeof(*mu)); + if (mu && posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) { + free(mu); + mu = NULL; + } + if (!mu) { + while (rv) { + struct mdinfo *n = rv->next; + + free(rv); + rv = n; + } + return NULL; + } + + mu->buf = malloc(ddf->conf_rec_len * 512); mu->len = ddf->conf_rec_len; mu->next = *updates; vc = find_vdcr(ddf, a->info.container_member); @@ -3063,14 +3605,36 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a, *updates = mu; return rv; } +#endif /* MDASSEMBLE */ + +static int ddf_level_to_layout(int level) +{ + switch(level) { + case 0: + case 1: + return 0; + case 5: + return ALGORITHM_LEFT_SYMMETRIC; + case 6: + return ALGORITHM_ROTATING_N_CONTINUE; + case 10: + return 0x102; + default: + return UnSet; + } +} struct superswitch super_ddf = { #ifndef MDASSEMBLE .examine_super = examine_super_ddf, .brief_examine_super = brief_examine_super_ddf, + .brief_examine_subarrays = brief_examine_subarrays_ddf, + .export_examine_super = export_examine_super_ddf, .detail_super = detail_super_ddf, .brief_detail_super = brief_detail_super_ddf, .validate_geometry = validate_geometry_ddf, + .write_init_super = write_init_super_ddf, + .add_to_super = add_to_super_ddf, #endif .match_home = match_home_ddf, .uuid_from_super= uuid_from_super_ddf, @@ -3082,88 +3646,24 @@ struct superswitch super_ddf = { .compare_super = compare_super_ddf, .load_super = load_super_ddf, - .init_super = init_zero_ddf, - .store_super = store_zero_ddf, + .init_super = init_super_ddf, + .store_super = store_super_ddf, .free_super = free_super_ddf, .match_metadata_desc = match_metadata_desc_ddf, - .getinfo_super_n = getinfo_super_n_container, - + .container_content = container_content_ddf, + .default_layout = ddf_level_to_layout, - .major = 1000, - .swapuuid = 0, .external = 1, +#ifndef MDASSEMBLE /* for mdmon */ .open_new = ddf_open_new, .set_array_state= ddf_set_array_state, .set_disk = ddf_set_disk, .sync_metadata = ddf_sync_metadata, .process_update = ddf_process_update, + .prepare_update = ddf_prepare_update, .activate_spare = ddf_activate_spare, - -}; - -/* Super_ddf_container is set by validate_geometry_ddf when given a - * device that is not part of any array - */ -struct superswitch super_ddf_container = { -#ifndef MDASSEMBLE - .validate_geometry = validate_geometry_ddf_container, - .write_init_super = write_init_super_ddf, #endif - - .load_super = load_super_ddf, - .init_super = init_super_ddf, - .add_to_super = add_to_super_ddf, - .getinfo_super = getinfo_super_ddf, - - .free_super = free_super_ddf, - - .container_content = container_content_ddf, - .getinfo_super_n = getinfo_super_n_container, - - .major = 1000, - .swapuuid = 0, - .external = 1, -}; - -struct superswitch super_ddf_bvd = { -#ifndef MDASSEMBLE -// .detail_super = detail_super_ddf_bvd, -// .brief_detail_super = brief_detail_super_ddf_bvd, - .validate_geometry = validate_geometry_ddf_bvd, - .write_init_super = write_init_super_ddf, -#endif - .update_super = update_super_ddf, - .init_super = init_super_ddf_bvd, - .add_to_super = add_to_super_ddf_bvd, - .getinfo_super = getinfo_super_ddf_bvd, - .getinfo_super_n = getinfo_super_n_bvd, - - .load_super = load_super_ddf, - .free_super = free_super_ddf, - .match_metadata_desc = match_metadata_desc_ddf_bvd, - - - .major = 1001, - .swapuuid = 0, - .external = 2, -}; - -struct superswitch super_ddf_svd = { -#ifndef MDASSEMBLE -// .detail_super = detail_super_ddf_svd, -// .brief_detail_super = brief_detail_super_ddf_svd, - .validate_geometry = validate_geometry_ddf_svd, -#endif - .update_super = update_super_ddf, - .init_super = init_super_ddf, - - .load_super = load_super_ddf, - .free_super = free_super_ddf, - .match_metadata_desc = match_metadata_desc_ddf_svd, - - .major = 1002, - .swapuuid = 0, - .external = 2, + .name = "ddf", };