#define HAVE_STDINT_H 1
#include "mdadm.h"
+#include "mdmon.h"
#include "sha1.h"
#include <values.h>
#define DDF_state_deleted 0x2
#define DDF_state_missing 0x3
#define DDF_state_failed 0x4
+#define DDF_state_part_optimal 0x5
#define DDF_state_morphing 0x8
#define DDF_state_inconsistent 0x10
/* virtual_entry.init_state is a bigendian bitmap */
#define DDF_initstate_mask 0x03
#define DDF_init_not 0x00
-#define DDF_init_quick 0x01
+#define DDF_init_quick 0x01 /* initialisation is progress.
+ * i.e. 'state_inconsistent' */
#define DDF_init_full 0x02
#define DDF_access_mask 0xc0
__u8 sec_elmnt_count;
__u8 sec_elmnt_seq;
__u8 srl;
- __u64 blocks;
- __u64 array_blocks;
+ __u64 blocks; /* blocks per component could be different
+ * on different component devices...(only
+ * for concat I hope) */
+ __u64 array_blocks; /* blocks in array */
__u8 pad1[8];
__u32 spare_refs[8];
__u8 cache_pol[8];
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
#endif
-struct superswitch super_ddf_container, super_ddf_bvd;
+extern struct superswitch super_ddf_container, super_ddf_bvd, super_ddf;
static int calc_crc(void *buf, int len)
{
!super->virt) {
free(super->phys);
free(super->virt);
+ super->phys = NULL;
+ super->virt = NULL;
return 2;
}
super->conflist = NULL;
char *conf;
int i;
int conflen;
+ int mppe;
/* First the local disk info */
super->max_part = __be16_to_cpu(super->active->max_partitions);
super->active->data_section_length,
0);
dl->devname = devname ? strdup(devname) : NULL;
+
fstat(fd, &stb);
dl->major = major(stb.st_rdev);
dl->minor = minor(stb.st_rdev);
super->conflist = vcl;
}
memcpy(&vcl->conf, vd, conflen*512);
+ mppe = __be16_to_cpu(super->anchor.max_primary_element_entries);
vcl->lba_offset = (__u64*)
- &vcl->conf.phys_refnum[super->max_part+1];
+ &vcl->conf.phys_refnum[mppe];
dl->vlist[i/conflen] = vcl;
}
free(conf);
int rv;
#ifndef MDASSEMBLE
- if (load_super_ddf_all(st, fd, &st->sb, devname, 0) == 0)
+ if (load_super_ddf_all(st, fd, &st->sb, devname, 1) == 0)
return 0;
#endif
sizeof(*super));
return 1;
}
+ memset(super, 0, sizeof(*super));
rv = load_ddf_headers(fd, super, devname);
if (rv) {
printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
printf(" Version : %.8s\n", sb->anchor.revision);
- printf("Controller GUID : "); print_guid(sb->anchor.guid, 1);
+ printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
+ printf("\n");
+ printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
printf("\n");
printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
ddf->controller.vendor_data[len] == 0);
}
-static struct vd_config *find_vdcr(struct ddf_super *ddf)
+static struct vd_config *find_vdcr(struct ddf_super *ddf, int inst)
{
- /* FIXME this just picks off the first one */
- return &ddf->conflist->conf;
+ struct vcl *v;
+ if (inst < 0 || inst > __be16_to_cpu(ddf->virt->populated_vdes))
+ return NULL;
+ for (v = ddf->conflist; v; v = v->next)
+ if (memcmp(v->conf.guid,
+ ddf->virt->entries[inst].guid,
+ DDF_GUID_LEN) == 0)
+ return &v->conf;
+ return NULL;
+}
+
+static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
+{
+ /* Find the entry in phys_disk which has the given refnum
+ * and return it's index
+ */
+ int i;
+ for (i=0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
+ if (ddf->phys->entries[i].refnum == phys_refnum)
+ return i;
+ return -1;
}
static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
* The first 16 bytes of the sha1 of these is used.
*/
struct ddf_super *ddf = st->sb;
- struct vd_config *vd = find_vdcr(ddf);
+ struct vd_config *vd = find_vdcr(ddf, st->container_member);
if (!vd)
memset(uuid, 0, sizeof (uuid));
// info->name[] ?? ;
}
+static void getinfo_super_n_container(struct supertype *st, struct mdinfo *info)
+{
+ /* just need offset and size */
+ struct ddf_super *ddf = st->sb;
+ int n = info->disk.number;
+
+ info->data_offset = __be64_to_cpu(ddf->phys->entries[n].config_size);
+ info->component_size = 32*1024*1024 / 512;
+}
+
+static int rlq_to_layout(int rlq, int prl, int raiddisks);
+
static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
{
struct ddf_super *ddf = st->sb;
- struct vd_config *vd = find_vdcr(ddf);
+ struct vd_config *vd = find_vdcr(ddf, info->container_member);
/* FIXME this returns BVD info - what if we want SVD ?? */
info->array.patch_version = 0;
info->array.raid_disks = __be16_to_cpu(vd->prim_elmnt_count);
info->array.level = map_num1(ddf_level_num, vd->prl);
- info->array.layout = vd->rlq; /* FIXME should this be mapped */
+ info->array.layout = rlq_to_layout(vd->rlq, vd->prl,
+ info->array.raid_disks);
info->array.md_minor = -1;
info->array.ctime = DECADE + __be32_to_cpu(*(__u32*)(vd->guid+16));
info->array.utime = DECADE + __be32_to_cpu(vd->timestamp);
// info->name[] ?? ;
}
+static void getinfo_super_n_bvd(struct supertype *st, struct mdinfo *info)
+{
+ /* Find the particular details for info->disk.raid_disk.
+ * This includes data_offset, component_size,
+ */
+ struct ddf_super *ddf = st->sb;
+ __u64 *lba_offset = ddf->newconf->lba_offset;
+ struct vd_config *conf = &ddf->newconf->conf;
+ info->data_offset = __be64_to_cpu(lba_offset[info->disk.raid_disk]);
+ info->component_size = __be64_to_cpu(conf->blocks);
+}
+
static int update_super_ddf(struct supertype *st, struct mdinfo *info,
char *update,
char *devname, int verbose,
*/
int rv = 0;
// struct ddf_super *ddf = st->sb;
-// struct vd_config *vd = find_vdcr(ddf);
+// struct vd_config *vd = find_vdcr(ddf, info->container_member);
// struct virtual_entry *ve = find_ve(ddf);
return -1;
}
+static int rlq_to_layout(int rlq, int prl, int raiddisks)
+{
+ switch(prl) {
+ case DDF_RAID0:
+ return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
+ case DDF_RAID1:
+ return 0; /* hopefully rlq == SIMPLE or MULTI depending
+ on raiddisks*/
+ case DDF_RAID4:
+ switch(rlq) {
+ case DDF_RAID4_N:
+ return 0;
+ default:
+ /* not supported */
+ return -1; /* FIXME this isn't checked */
+ }
+ case DDF_RAID5:
+ case DDF_RAID6:
+ switch(rlq) {
+ case DDF_RAID5_N_RESTART:
+ return ALGORITHM_LEFT_ASYMMETRIC;
+ case DDF_RAID5_0_RESTART:
+ return ALGORITHM_RIGHT_ASYMMETRIC;
+ case DDF_RAID5_N_CONTINUE:
+ return ALGORITHM_LEFT_SYMMETRIC;
+ default:
+ return -1;
+ }
+ }
+ return -1;
+}
+
static int init_super_ddf_bvd(struct supertype *st,
mdu_array_info_t *info,
unsigned long long size,
return 0;
}
ve = &ddf->virt->entries[venum];
+ st->container_member = venum;
/* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
* timestamp, random number
ve->pad0 = 0xFFFF;
ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
ve->type = 0;
- ve->state = 0;
- ve->init_state = 0;
- if (!(info->state & 1))
- ve->init_state = DDF_state_inconsistent;
+ ve->state = DDF_state_degraded; /* Will be modified as devices are added */
+ if (info->state & 1) /* clean */
+ ve->init_state = DDF_init_full;
+ else
+ ve->init_state = DDF_init_not;
+
memset(ve->pad1, 0xff, 14);
memset(ve->name, ' ', 16);
if (name)
/* Now create a new vd_config */
conflen = __be16_to_cpu(ddf->active->config_record_len);
vcl = malloc(offsetof(struct vcl, conf) + conflen * 512);
- vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->max_part+1];
+ mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+ vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[mppe];
vc = &vcl->conf;
memset(vc->v2, 0xff, 16);
memset(vc->v3, 0xff, 16);
memset(vc->vendor, 0xff, 32);
- mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+
memset(vc->phys_refnum, 0xff, 4*mppe);
memset(vc->phys_refnum+mppe, 0x00, 8*mppe);
struct vd_config *vc;
__u64 *lba_offset;
int mppe;
+ int working;
for (dl = ddf->dlist; dl ; dl = dl->next)
if (dl->major == dk->major &&
lba_offset = (__u64*)(vc->phys_refnum + mppe);
lba_offset[dk->raid_disk] = 0; /* FIXME */
- dl->vlist[0] =ddf->newconf; /* FIXME */
+ dl->vlist[0] = ddf->newconf; /* FIXME */
dl->fd = fd;
dl->devname = devname;
+
+ /* Check how many working raid_disks, and if we can mark
+ * array as optimal yet
+ */
+ working = 0;
+#if 0
+ for (i=0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
+ if (vc->phys_refnum[i] != 0xffffffff)
+ working++;
+ if (working == __be16_to_cpu(vc->prim_elmnt_count))
+ ->entries[xx].state = (->entries[xx].state & ~DDF_state_mask)
+ | DDF_state_optimal;
+
+ if (vc->prl == DDF_RAID6 &&
+ working+1 == __be16_to_cpu(vc->prim_elmnt_count))
+ ->entries[xx].state = (->entries[xx].state & ~DDF_state_mask)
+ | DDF_state_part_optimal;
+#endif
}
/* add a device to a container, either while creating it or while
*/
#ifndef MDASSEMBLE
-static int write_init_super_ddf(struct supertype *st)
+static int __write_init_super_ddf(struct supertype *st, int do_close)
{
struct ddf_super *ddf = st->sb;
lseek64(fd, (size-1)*512, SEEK_SET);
write(fd, &ddf->anchor, 512);
- close(fd);
+ if (do_close) {
+ close(fd);
+ d->fd = -1;
+ }
}
return 1;
}
+
+static int write_init_super_ddf(struct supertype *st)
+{
+ return __write_init_super_ddf(st, 1);
+}
+
#endif
static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
st->ss = &super_ddf_bvd;
if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) {
st->sb = ddf;
+ st->container_dev = fd2devnum(cfd);
+ st->container_member = 27; // FIXME
close(cfd);
return st->ss->validate_geometry(st, level, layout,
raiddisks, chunk, size,
super = malloc(sizeof(*super));
if (!super)
return 1;
+ memset(super, 0, sizeof(*super));
/* first, try each device, and choose the best ddf */
for (sd = sra->devs ; sd ; sd = sd->next) {
int rv;
sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
- dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
- if (!dfd)
+ dfd = dev_open(nm, O_RDONLY);
+ if (dfd < 0)
return 2;
rv = load_ddf_headers(dfd, super, NULL);
- if (!keep_fd) close(dfd);
+ close(dfd);
if (rv == 0) {
seq = __be32_to_cpu(super->active->seq);
if (super->active->openflag)
/* OK, load this ddf */
sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
dfd = dev_open(nm, O_RDONLY);
- if (!dfd)
+ if (dfd < 0)
return 1;
load_ddf_headers(dfd, super, NULL);
load_ddf_global(dfd, super, NULL);
for (sd = sra->devs ; sd ; sd = sd->next) {
sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
- if (!dfd)
+ if (dfd < 0)
return 2;
seq = load_ddf_local(dfd, super, NULL, keep_fd);
if (!keep_fd) close(dfd);
}
*sbp = super;
if (st->ss == NULL) {
- st->ss = &super_ddf;
+ st->ss = &super_ddf_container;
st->minor_version = 0;
st->max_devs = 512;
+ st->container_dev = fd2devnum(fd);
}
return 0;
}
+static struct mdinfo *container_content_ddf(struct supertype *st)
+{
+ /* Given a container loaded by load_super_ddf_all,
+ * extract information about all the arrays into
+ * an mdinfo tree.
+ *
+ * For each vcl in conflist: create an mdinfo, fill it in,
+ * then look for matching devices (phys_refnum) in dlist
+ * and create appropriate device mdinfo.
+ */
+ struct ddf_super *ddf = st->sb;
+ struct mdinfo *rest = NULL;
+ struct vcl *vc;
+
+ for (vc = ddf->conflist ; vc ; vc=vc->next)
+ {
+ int mppe;
+ int i;
+ struct mdinfo *this;
+ this = malloc(sizeof(*this));
+ memset(this, 0, sizeof(*this));
+ this->next = rest;
+ rest = this;
+
+ this->array.major_version = 1000;
+ this->array.minor_version = 0;
+ this->array.patch_version = 0;
+ this->array.level = map_num1(ddf_level_num, vc->conf.prl);
+ this->array.raid_disks =
+ __be16_to_cpu(vc->conf.prim_elmnt_count);
+ /* FIXME this should be mapped */
+ this->array.layout = vc->conf.rlq;
+ this->array.md_minor = -1;
+ this->array.ctime = DECADE +
+ __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+ this->array.utime = DECADE +
+ __be32_to_cpu(vc->conf.timestamp);
+ this->array.chunk_size = 512 << vc->conf.chunk_shift;
+
+ for (i=0; i < __be16_to_cpu(ddf->virt->populated_vdes); i++)
+ if (memcmp(ddf->virt->entries[i].guid,
+ vc->conf.guid, DDF_GUID_LEN) == 0)
+ break;
+ if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
+ (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
+ DDF_init_full) {
+ this->array.state = 0;
+ this->resync_start = 0;
+ } else {
+ this->array.state = 1;
+ this->resync_start = ~0ULL;
+ }
+ memcpy(this->name, ddf->virt->entries[i].name, 32);
+ this->name[33]=0;
+
+ memset(this->uuid, 0, sizeof(this->uuid));
+ this->component_size = __be64_to_cpu(vc->conf.blocks);
+ this->array.size = this->component_size / 2;
+ this->container_member = i;
+
+ mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+ for (i=0 ; i < mppe ; i++) {
+ struct mdinfo *dev;
+ struct dl *d;
+
+ if (vc->conf.phys_refnum[i] == 0xFFFFFFFF)
+ continue;
+
+ this->array.working_disks++;
+
+ for (d = ddf->dlist; d ; d=d->next)
+ if (d->disk.refnum == vc->conf.phys_refnum[i])
+ break;
+ if (d == NULL)
+ break;
+
+ dev = malloc(sizeof(*dev));
+ memset(dev, 0, sizeof(*dev));
+ dev->next = this->devs;
+ this->devs = dev;
+
+ dev->disk.number = __be32_to_cpu(d->disk.refnum);
+ dev->disk.major = d->major;
+ dev->disk.minor = d->minor;
+ dev->disk.raid_disk = i;
+ dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+
+ dev->events = __le32_to_cpu(ddf->primary.seq);
+ dev->data_offset = vc->lba_offset[i];
+ dev->component_size = __be64_to_cpu(vc->conf.blocks);
+ if (d->devname)
+ strcpy(dev->name, d->devname);
+ }
+ }
+ return rest;
+}
+
static int init_zero_ddf(struct supertype *st,
mdu_array_info_t *info,
unsigned long long size, char *name,
return 0;
}
+/*
+ * A new array 'a' has been started which claims to be instance 'inst'
+ * within container 'c'.
+ * We need to confirm that the array matches the metadata in 'c' so
+ * that we don't corrupt any metadata.
+ */
+static int ddf_open_new(struct supertype *c, struct active_array *a, int inst)
+{
+ fprintf(stderr, "ddf: open_new %d\n", inst);
+ return 0;
+}
+
+/*
+ * The array 'a' is to be marked clean in the metadata.
+ * If '->resync_start' is not ~(unsigned long long)0, then the array is only
+ * clean up to the point (in sectors). If that cannot be recorded in the
+ * metadata, then leave it as dirty.
+ *
+ * For DDF, we need to clear the DDF_state_inconsistent bit in the
+ * !global! virtual_disk.virtual_entry structure.
+ */
+static void ddf_set_array_state(struct active_array *a, int consistent)
+{
+ struct ddf_super *ddf = a->container->sb;
+ int inst = a->info.container_member;
+ if (consistent)
+ ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
+ else
+ ddf->virt->entries[inst].state |= DDF_state_inconsistent;
+ ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
+ if (a->resync_start == ~0ULL)
+ ddf->virt->entries[inst].init_state |= DDF_init_full;
+ else if (a->resync_start == 0)
+ ddf->virt->entries[inst].init_state |= DDF_init_not;
+ else
+ ddf->virt->entries[inst].init_state |= DDF_init_quick;
+
+ printf("ddf mark %s %llu\n", consistent?"clean":"dirty",
+ a->resync_start);
+}
+
+/*
+ * The state of each disk is stored in the global phys_disk structure
+ * in phys_disk.entries[n].state.
+ * This makes various combinations awkward.
+ * - When a device fails in any array, it must be failed in all arrays
+ * that include a part of this device.
+ * - When a component is rebuilding, we cannot include it officially in the
+ * array unless this is the only array that uses the device.
+ *
+ * So: when transitioning:
+ * Online -> failed, just set failed flag. monitor will propagate
+ * spare -> online, the device might need to be added to the array.
+ * spare -> failed, just set failed. Don't worry if in array or not.
+ */
+static void ddf_set_disk(struct active_array *a, int n, int state)
+{
+ struct ddf_super *ddf = a->container->sb;
+ int inst = a->info.container_member;
+ struct vd_config *vc = find_vdcr(ddf, inst);
+ int pd = find_phys(ddf, vc->phys_refnum[n]);
+ int i, st, working;
+
+ if (vc == NULL) {
+ fprintf(stderr, "ddf: cannot find instance %d!!\n", inst);
+ return;
+ }
+ if (pd < 0) {
+ /* disk doesn't currently exist. If it is now in_sync,
+ * insert it. */
+ if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
+ /* Find dev 'n' in a->info->devs, determine the
+ * ddf refnum, and set vc->phys_refnum and update
+ * phys->entries[]
+ */
+ /* FIXME */
+ }
+ } else {
+ if (state & DS_FAULTY)
+ ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
+ if (state & DS_INSYNC) {
+ ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
+ ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
+ }
+ }
+
+ /* Now we need to check the state of the array and update
+ * virtual_disk.entries[n].state.
+ * It needs to be one of "optimal", "degraded", "failed".
+ * I don't understand 'deleted' or 'missing'.
+ */
+ working = 0;
+ for (i=0; i < a->info.array.raid_disks; i++) {
+ pd = find_phys(ddf, vc->phys_refnum[i]);
+ if (pd < 0)
+ continue;
+ st = ddf->phys->entries[pd].state;
+ if ((state & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+ == DDF_Online)
+ working++;
+ }
+ state = DDF_state_degraded;
+ if (working == a->info.array.raid_disks)
+ state = DDF_state_optimal;
+ else switch(vc->prl) {
+ case DDF_RAID0:
+ case DDF_CONCAT:
+ case DDF_JBOD:
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID1:
+ if (working == 0)
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID4:
+ case DDF_RAID5:
+ if (working < a->info.array.raid_disks-1)
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID6:
+ if (working < a->info.array.raid_disks-2)
+ state = DDF_state_failed;
+ else if (working == a->info.array.raid_disks-1)
+ state = DDF_state_part_optimal;
+ break;
+ }
+
+ ddf->virt->entries[inst].state =
+ (ddf->virt->entries[inst].state & ~DDF_state_mask)
+ | state;
+
+ fprintf(stderr, "ddf: set_disk %d\n", n);
+}
+
+static void ddf_sync_metadata(struct active_array *a)
+{
+
+ /*
+ * Write all data to all devices.
+ * Later, we might be able to track whether only local changes
+ * have been made, or whether any global data has been changed,
+ * but ddf is sufficiently weird that it probably always
+ * changes global data ....
+ */
+ __write_init_super_ddf(a->container, 0);
+ fprintf(stderr, "ddf: sync_metadata\n");
+}
+
struct superswitch super_ddf = {
#ifndef MDASSEMBLE
.examine_super = examine_super_ddf,
.store_super = store_zero_ddf,
.free_super = free_super_ddf,
.match_metadata_desc = match_metadata_desc_ddf,
+ .getinfo_super_n = getinfo_super_n_container,
.major = 1000,
.swapuuid = 0,
.external = 1,
.text_version = "ddf",
+
+/* for mdmon */
+ .open_new = ddf_open_new,
+ .set_array_state= ddf_set_array_state,
+ .set_disk = ddf_set_disk,
+ .sync_metadata = ddf_sync_metadata,
+
+
};
/* Super_ddf_container is set by validate_geometry_ddf when given a
.write_init_super = write_init_super_ddf,
#endif
+ .load_super = load_super_ddf,
.init_super = init_super_ddf,
.add_to_super = add_to_super_ddf,
+ .getinfo_super = getinfo_super_ddf,
.free_super = free_super_ddf,
+ .container_content = container_content_ddf,
+ .getinfo_super_n = getinfo_super_n_container,
+
.major = 1000,
.swapuuid = 0,
.external = 1,
.init_super = init_super_ddf_bvd,
.add_to_super = add_to_super_ddf_bvd,
.getinfo_super = getinfo_super_ddf_bvd,
+ .getinfo_super_n = getinfo_super_n_bvd,
.load_super = load_super_ddf,
.free_super = free_super_ddf,