#define DDF_REVISION "01.00.00"
struct ddf_header {
- __u32 magic;
+ __u32 magic; /* DDF_HEADER_MAGIC */
__u32 crc;
char guid[DDF_GUID_LEN];
char revision[8]; /* 01.00.00 */
/* The content of the 'controller section' - global scope */
struct ddf_controller_data {
- __u32 magic;
+ __u32 magic; /* DDF_CONTROLLER_MAGIC */
__u32 crc;
char guid[DDF_GUID_LEN];
struct controller_type {
/* The content of phys_section - global scope */
struct phys_disk {
- __u32 magic;
+ __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
__u32 crc;
__u16 used_pdes;
__u16 max_pdes;
/* phys_disk_entry.type is a bitmap - bigendian remember */
#define DDF_Forced_PD_GUID 1
#define DDF_Active_in_VD 2
-#define DDF_Global_Spare 4
+#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
#define DDF_Spare 8 /* overrides Global_spare */
#define DDF_Foreign 16
#define DDF_Legacy 32 /* no DDF on this device */
/* The content of the virt_section global scope */
struct virtual_disk {
- __u32 magic;
+ __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
__u32 crc;
__u16 populated_vdes;
__u16 max_vdes;
*/
struct vd_config {
- __u32 magic;
+ __u32 magic; /* DDF_VD_CONF_MAGIC */
__u32 crc;
char guid[DDF_GUID_LEN];
__u32 timestamp;
#define DDF_cache_rallowed 64 /* enable read caching */
struct spare_assign {
- __u32 magic;
+ __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
__u32 crc;
__u32 timestamp;
__u8 reserved[7];
/* The data_section contents - local scope */
struct disk_data {
- __u32 magic;
+ __u32 magic; /* DDF_PHYS_DATA_MAGIC */
__u32 crc;
char guid[DDF_GUID_LEN];
__u32 refnum; /* crc of some magic drive data ... */
struct phys_disk *phys;
struct virtual_disk *virt;
int pdsize, vdsize;
- int max_part;
+ int max_part, mppe, conf_rec_len;
struct vcl {
struct vcl *next;
__u64 *lba_offset; /* location in 'conf' of
* the lba table */
struct vd_config conf;
} *conflist, *newconf;
+ int conf_num; /* Index into 'virt' of entry matching 'newconf' */
struct dl {
struct dl *next;
struct disk_data disk;
int major, minor;
char *devname;
int fd;
- struct vcl *vlist[0]; /* max_part+1 in size */
+ int pdnum; /* index in ->phys */
+ struct spare_assign *spare;
+ struct vcl *vlist[0]; /* max_part in size */
} *dlist;
};
}
super->conflist = NULL;
super->dlist = NULL;
+
+ super->max_part = __be16_to_cpu(super->active->max_partitions);
+ super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
+ super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
return 0;
}
struct stat stb;
char *conf;
int i;
- int conflen;
- int mppe;
+ int vnum;
/* First the local disk info */
- super->max_part = __be16_to_cpu(super->active->max_partitions);
dl = malloc(sizeof(*dl) +
- (super->max_part+1) * sizeof(dl->vlist[0]));
+ (super->max_part) * sizeof(dl->vlist[0]));
load_section(fd, super, &dl->disk,
super->active->data_section_offset,
dl->minor = minor(stb.st_rdev);
dl->next = super->dlist;
dl->fd = keep ? fd : -1;
- for (i=0 ; i < super->max_part + 1 ; i++)
+ dl->spare = NULL;
+ for (i=0 ; i < super->max_part ; i++)
dl->vlist[i] = NULL;
super->dlist = dl;
+ dl->pdnum = 0;
+ for (i=0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
+ if (memcmp(super->phys->entries[i].guid,
+ dl->disk.guid, DDF_GUID_LEN) == 0)
+ dl->pdnum = i;
+
/* Now the config list. */
/* 'conf' is an array of config entries, some of which are
* probably invalid. Those which are good need to be copied into
* the conflist
*/
- conflen = __be16_to_cpu(super->active->config_record_len);
conf = load_section(fd, super, NULL,
super->active->config_section_offset,
super->active->config_section_length,
0);
+ vnum = 0;
for (i = 0;
i < __be32_to_cpu(super->active->config_section_length);
- i += conflen) {
+ i += super->conf_rec_len) {
struct vd_config *vd =
(struct vd_config *)((char*)conf + i*512);
struct vcl *vcl;
+ if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
+ if (dl->spare)
+ continue;
+ dl->spare = malloc(super->conf_rec_len*512);
+ memcpy(dl->spare, vd, super->conf_rec_len*512);
+ continue;
+ }
if (vd->magic != DDF_VD_CONF_MAGIC)
continue;
for (vcl = super->conflist; vcl; vcl = vcl->next) {
}
if (vcl) {
- dl->vlist[i/conflen] = vcl;
+ dl->vlist[vnum++] = vcl;
if (__be32_to_cpu(vd->seqnum) <=
__be32_to_cpu(vcl->conf.seqnum))
continue;
} else {
- vcl = malloc(conflen*512 + offsetof(struct vcl, conf));
+ vcl = malloc(super->conf_rec_len*512 +
+ offsetof(struct vcl, conf));
vcl->next = super->conflist;
super->conflist = vcl;
+ dl->vlist[vnum++] = vcl;
}
- memcpy(&vcl->conf, vd, conflen*512);
- mppe = __be16_to_cpu(super->anchor.max_primary_element_entries);
+ memcpy(&vcl->conf, vd, super->conf_rec_len*512);
vcl->lba_offset = (__u64*)
- &vcl->conf.phys_refnum[mppe];
- dl->vlist[i/conflen] = vcl;
+ &vcl->conf.phys_refnum[super->mppe];
}
free(conf);
ddf->dlist = d->next;
if (d->fd >= 0)
close(d->fd);
+ if (d->spare)
+ free(d->spare);
free(d);
}
free(ddf);
return NULL;
st = malloc(sizeof(*st));
+ memset(st, 0, sizeof(*st));
st->ss = &super_ddf;
st->max_devs = 512;
st->minor_version = 0;
return NULL;
st = malloc(sizeof(*st));
+ memset(st, 0, sizeof(*st));
st->ss = &super_ddf_bvd;
st->max_devs = 512;
st->minor_version = 0;
return NULL;
st = malloc(sizeof(*st));
+ memset(st, 0, sizeof(*st));
st->ss = &super_ddf_svd;
st->max_devs = 512;
st->minor_version = 0;
{ DDF_RAID0, 0 },
{ DDF_RAID1, 1 },
{ DDF_RAID3, LEVEL_UNSUPPORTED },
- { DDF_RAID5, 4 },
+ { DDF_RAID4, 4 },
+ { DDF_RAID5, 5 },
{ DDF_RAID1E, LEVEL_UNSUPPORTED },
{ DDF_JBOD, LEVEL_UNSUPPORTED },
{ DDF_CONCAT, LEVEL_LINEAR },
static void examine_vd(int n, struct ddf_super *sb, char *guid)
{
- int crl = __be16_to_cpu(sb->anchor.config_record_len);
+ int crl = sb->conf_rec_len;
struct vcl *vcl;
for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
* The first 16 bytes of the sha1 of these is used.
*/
struct ddf_super *ddf = st->sb;
- struct vd_config *vd = find_vdcr(ddf, st->container_member);
+ struct vd_config *vd = find_vdcr(ddf, ddf->conf_num);
if (!vd)
memset(uuid, 0, sizeof (uuid));
info->disk.major = 0;
info->disk.minor = 0;
- info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
-// info->disk.raid_disk = find refnum in the table and use index;
- info->disk.raid_disk = -1;
- for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes) ; i++)
- if (ddf->phys->entries[i].refnum == ddf->dlist->disk.refnum) {
- info->disk.raid_disk = i;
- break;
- }
+ if (ddf->dlist) {
+ info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
+ info->disk.raid_disk = -1;
+ for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes) ; i++)
+ if (ddf->phys->entries[i].refnum ==
+ ddf->dlist->disk.refnum) {
+ info->disk.raid_disk = i;
+ break;
+ }
+ } else {
+ info->disk.number = -1;
+// info->disk.raid_disk = find refnum in the table and use index;
+ }
info->disk.state = (1 << MD_DISK_SYNC);
info->reshape_active = 0;
// info->disk.raid_disk = find refnum in the table and use index;
// info->disk.state = ???;
+ info->resync_start = 0;
+ if (!(ddf->virt->entries[info->container_member].state
+ & DDF_state_inconsistent) &&
+ (ddf->virt->entries[info->container_member].init_state
+ & DDF_initstate_mask)
+ == DDF_init_full)
+ info->resync_start = ~0ULL;
+
uuid_from_super_ddf(st, info->uuid);
sprintf(info->text_version, "/%s/%d",
ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
ddf->max_part = 64;
- ddf->anchor.config_record_len = __cpu_to_be16(1 + 256*12/512);
+ ddf->conf_rec_len = 1 + 256 * 12 / 512;
+ ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
ddf->anchor.max_primary_element_entries = __cpu_to_be16(256);
+ ddf->mppe = 256;
memset(ddf->anchor.pad3, 0xff, 54);
/* controller sections is one sector long immediately
* Remaining 16 are serial number.... maybe a hostname would do?
*/
memcpy(ddf->controller.guid, T10, sizeof(T10));
- gethostname(hostname, 17);
- hostname[17] = 0;
+ gethostname(hostname, sizeof(hostname));
+ hostname[sizeof(hostname) - 1] = 0;
hostlen = strlen(hostname);
memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
for (i = strlen(T10) ; i+hostlen < 24; i++)
struct virtual_entry *ve;
struct vcl *vcl;
struct vd_config *vc;
- int mppe;
- int conflen;
if (__be16_to_cpu(ddf->virt->populated_vdes)
>= __be16_to_cpu(ddf->virt->max_vdes)) {
return 0;
}
ve = &ddf->virt->entries[venum];
- st->container_member = venum;
+ ddf->conf_num = venum;
/* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
* timestamp, random number
__cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
/* Now create a new vd_config */
- conflen = __be16_to_cpu(ddf->active->config_record_len);
- vcl = malloc(offsetof(struct vcl, conf) + conflen * 512);
- mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
- vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[mppe];
+ vcl = malloc(offsetof(struct vcl, conf) + ddf->conf_rec_len * 512);
+ vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
vc = &vcl->conf;
memset(vc->v3, 0xff, 16);
memset(vc->vendor, 0xff, 32);
- memset(vc->phys_refnum, 0xff, 4*mppe);
- memset(vc->phys_refnum+mppe, 0x00, 8*mppe);
+ memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
+ memset(vc->phys_refnum+(ddf->mppe * 4), 0x00, 8*ddf->mppe);
vcl->next = ddf->conflist;
ddf->conflist = vcl;
* We need to find suitable free space in that device and update
* the phys_refnum and lba_offset for the newly created vd_config.
* We might also want to update the type in the phys_disk
- * section. FIXME
+ * section.
*/
struct dl *dl;
struct ddf_super *ddf = st->sb;
struct vd_config *vc;
__u64 *lba_offset;
- int mppe;
int working;
+ int i;
+ int max_virt_disks;
for (dl = ddf->dlist; dl ; dl = dl->next)
if (dl->major == dk->major &&
return;
vc = &ddf->newconf->conf;
+ lba_offset = ddf->newconf->lba_offset;
vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
- mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
- lba_offset = (__u64*)(vc->phys_refnum + mppe);
lba_offset[dk->raid_disk] = 0; /* FIXME */
- dl->vlist[0] = ddf->newconf; /* FIXME */
+ for (i=0; i < ddf->max_part ; i++)
+ if (dl->vlist[i] == NULL)
+ break;
+ if (i == ddf->max_part)
+ return;
+ dl->vlist[i] = ddf->newconf;
dl->fd = fd;
dl->devname = devname;
* array as optimal yet
*/
working = 0;
-#if 0
+
for (i=0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
if (vc->phys_refnum[i] != 0xffffffff)
working++;
+ /* Find which virtual_entry */
+ max_virt_disks = __be16_to_cpu(ddf->active->max_vd_entries);
+ for (i=0; i < max_virt_disks ; i++)
+ if (memcmp(ddf->virt->entries[i].guid,
+ vc->guid, DDF_GUID_LEN)==0)
+ break;
+ if (i == max_virt_disks)
+ return;
if (working == __be16_to_cpu(vc->prim_elmnt_count))
- ->entries[xx].state = (->entries[xx].state & ~DDF_state_mask)
+ ddf->virt->entries[i].state =
+ (ddf->virt->entries[i].state & ~DDF_state_mask)
| DDF_state_optimal;
if (vc->prl == DDF_RAID6 &&
working+1 == __be16_to_cpu(vc->prim_elmnt_count))
- ->entries[xx].state = (->entries[xx].state & ~DDF_state_mask)
+ ddf->virt->entries[i].state =
+ (ddf->virt->entries[i].state & ~DDF_state_mask)
| DDF_state_part_optimal;
-#endif
+
+ ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
}
/* add a device to a container, either while creating it or while
* a phys_disk entry and a more detailed disk_data entry.
*/
fstat(fd, &stb);
- dd = malloc(sizeof(*dd) + sizeof(dd->vlist[0]) * (ddf->max_part+1));
+ dd = malloc(sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part);
dd->major = major(stb.st_rdev);
dd->minor = minor(stb.st_rdev);
dd->devname = devname;
dd->next = ddf->dlist;
dd->fd = fd;
+ dd->spare = NULL;
dd->disk.magic = DDF_PHYS_DATA_MAGIC;
now = time(0);
memset(dd->disk.vendor, ' ', 32);
memcpy(dd->disk.vendor, "Linux", 5);
memset(dd->disk.pad, 0xff, 442);
- for (i = 0; i < ddf->max_part+1 ; i++)
+ for (i = 0; i < ddf->max_part ; i++)
dd->vlist[i] = NULL;
n = __be16_to_cpu(ddf->phys->used_pdes);
pde = &ddf->phys->entries[n];
+ dd->pdnum = n;
+
n++;
ddf->phys->used_pdes = __cpu_to_be16(n);
memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
pde->refnum = dd->disk.refnum;
- pde->type = __cpu_to_be16(DDF_Forced_PD_GUID |DDF_Global_Spare);
+ pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
pde->state = __cpu_to_be16(DDF_Online);
get_dev_size(fd, NULL, &size);
/* We are required to reserve 32Meg, and record the size in sectors */
write(fd, ddf->virt, ddf->vdsize);
/* Now write lots of config records. */
- n_config = __be16_to_cpu(ddf->active->max_partitions);
- conf_size = __be16_to_cpu(ddf->active->config_record_len) * 512;
+ n_config = ddf->max_part;
+ conf_size = ddf->conf_rec_len * 512;
for (i = 0 ; i <= n_config ; i++) {
struct vcl *c = d->vlist[i];
+ if (i == n_config)
+ c = (struct vcl*)d->spare;
if (c) {
c->conf.crc = calc_crc(&c->conf, conf_size);
if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) {
st->sb = ddf;
st->container_dev = fd2devnum(cfd);
- st->container_member = 27; // FIXME
close(cfd);
return st->ss->validate_geometry(st, level, layout,
raiddisks, chunk, size,
dev, freesize);
}
close(cfd);
- }
- fprintf(stderr, Name ": Cannot use %s: Already in use\n",
- dev);
+ } else /* device may belong to a different container */
+ return 0;
+
return 1;
}
}
close(fd);
- *freesize = avail_size_ddf(st, ldsize);
+ *freesize = avail_size_ddf(st, ldsize >> 9);
return 1;
}
struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
{
/* find a list of used extents on the give physical device
- * (dnum) or the given ddf.
+ * (dnum) of the given ddf.
* Return a malloced array of 'struct extent'
FIXME ignore DDF_Legacy devices?
int dnum;
int i, j;
+ /* FIXME this is dl->pdnum */
for (dnum = 0; dnum < ddf->phys->used_pdes; dnum++)
if (memcmp(dl->disk.guid,
ddf->phys->entries[dnum].guid,
if (!rv)
return NULL;
- for (i = 0; i < ddf->max_part+1; i++) {
+ for (i = 0; i < ddf->max_part; i++) {
struct vcl *v = dl->vlist[i];
if (v == NULL)
continue;
for (dl = ddf->dlist; dl ; dl = dl->next)
{
int found = 0;
+ pos = 0;
i = 0;
e = get_extents(ddf, dl);
for (vc = ddf->conflist ; vc ; vc=vc->next)
{
- int mppe;
int i;
struct mdinfo *this;
this = malloc(sizeof(*this));
this->array.level = map_num1(ddf_level_num, vc->conf.prl);
this->array.raid_disks =
__be16_to_cpu(vc->conf.prim_elmnt_count);
- /* FIXME this should be mapped */
- this->array.layout = vc->conf.rlq;
+ this->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
+ this->array.raid_disks);
this->array.md_minor = -1;
this->array.ctime = DECADE +
__be32_to_cpu(*(__u32*)(vc->conf.guid+16));
this->array.size = this->component_size / 2;
this->container_member = i;
- mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
- for (i=0 ; i < mppe ; i++) {
+ sprintf(this->text_version, "/%s/%d",
+ devnum2devname(st->container_dev),
+ this->container_member);
+
+
+ for (i=0 ; i < ddf->mppe ; i++) {
struct mdinfo *dev;
struct dl *d;
* We need to confirm that the array matches the metadata in 'c' so
* that we don't corrupt any metadata.
*/
-static int ddf_open_new(struct supertype *c, struct active_array *a, int inst)
+static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
{
- fprintf(stderr, "ddf: open_new %d\n", inst);
+ fprintf(stderr, "ddf: open_new %s\n", inst);
+ a->info.container_member = atoi(inst);
return 0;
}
}
}
+ fprintf(stderr, "ddf: set_disk %d to %x\n", n, state);
+
/* Now we need to check the state of the array and update
* virtual_disk.entries[n].state.
* It needs to be one of "optimal", "degraded", "failed".
pd = find_phys(ddf, vc->phys_refnum[i]);
if (pd < 0)
continue;
- st = ddf->phys->entries[pd].state;
- if ((state & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+ st = __be16_to_cpu(ddf->phys->entries[pd].state);
+ if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
== DDF_Online)
working++;
}
(ddf->virt->entries[inst].state & ~DDF_state_mask)
| state;
- fprintf(stderr, "ddf: set_disk %d\n", n);
}
-static void ddf_sync_metadata(struct active_array *a)
+static void ddf_sync_metadata(struct supertype *st)
{
/*
* but ddf is sufficiently weird that it probably always
* changes global data ....
*/
- __write_init_super_ddf(a->container, 0);
+ __write_init_super_ddf(st, 0);
fprintf(stderr, "ddf: sync_metadata\n");
}
+static void ddf_process_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /* Apply this update to the metadata.
+ * The first 4 bytes are a DDF_*_MAGIC which guides
+ * our actions.
+ * Possible update are:
+ * DDF_PHYS_RECORDS_MAGIC
+ * Add a new physical device. Changes to this record
+ * only happen implicitly.
+ * used_pdes is the device number.
+ * DDF_VIRT_RECORDS_MAGIC
+ * Add a new VD. Possibly also change the 'access' bits.
+ * populated_vdes is the entry number.
+ * DDF_VD_CONF_MAGIC
+ * New or updated VD. the VIRT_RECORD must already
+ * exist. For an update, phys_refnum and lba_offset
+ * (at least) are updated, and the VD_CONF must
+ * be written to precisely those devices listed with
+ * a phys_refnum.
+ * DDF_SPARE_ASSIGN_MAGIC
+ * replacement Spare Assignment Record... but for which device?
+ *
+ * So, e.g.:
+ * - to create a new array, we send a VIRT_RECORD and
+ * a VD_CONF. Then assemble and start the array.
+ * - to activate a spare we send a VD_CONF to add the phys_refnum
+ * and offset. This will also mark the spare as active with
+ * a spare-assignment record.
+ */
+ struct ddf_super *ddf = st->sb;
+ __u32 *magic = (__u32*)update->buf;
+ struct phys_disk *pd;
+ struct virtual_disk *vd;
+ struct vd_config *vc;
+ struct vcl *vcl;
+ struct dl *dl;
+ int mppe;
+ int ent;
+
+ printf("Process update %x\n", *magic);
+
+ switch (*magic) {
+ case DDF_PHYS_RECORDS_MAGIC:
+
+ if (update->len != (sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry)))
+ return;
+ pd = (struct phys_disk*)update->buf;
+
+ ent = __be16_to_cpu(pd->used_pdes);
+ if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
+ return;
+ if (!all_ff(ddf->phys->entries[ent].guid))
+ return;
+ ddf->phys->entries[ent] = pd->entries[0];
+ ddf->phys->used_pdes = __cpu_to_be16(1 +
+ __be16_to_cpu(ddf->phys->used_pdes));
+ break;
+
+ case DDF_VIRT_RECORDS_MAGIC:
+
+ if (update->len != (sizeof(struct virtual_disk) +
+ sizeof(struct virtual_entry)))
+ return;
+ vd = (struct virtual_disk*)update->buf;
+
+ ent = __be16_to_cpu(vd->populated_vdes);
+ if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
+ return;
+ if (!all_ff(ddf->virt->entries[ent].guid))
+ return;
+ ddf->virt->entries[ent] = vd->entries[0];
+ ddf->virt->populated_vdes = __cpu_to_be16(1 +
+ __be16_to_cpu(ddf->virt->populated_vdes));
+ break;
+
+ case DDF_VD_CONF_MAGIC:
+ printf("len %d %d\n", update->len, ddf->conf_rec_len);
+
+ mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+ if (update->len != ddf->conf_rec_len)
+ return;
+ vc = (struct vd_config*)update->buf;
+ for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+ if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
+ break;
+ printf("vcl = %p\n", vcl);
+ if (vcl) {
+ /* An update, just copy the phys_refnum and lba_offset
+ * fields
+ */
+ memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
+ mppe * (sizeof(__u32) + sizeof(__u64)));
+ } else {
+ /* A new VD_CONF */
+ vcl = update->space;
+ update->space = NULL;
+ vcl->next = ddf->conflist;
+ vcl->conf = *vc;
+ vcl->lba_offset = (__u64*)
+ &vcl->conf.phys_refnum[mppe];
+ ddf->conflist = vcl;
+ }
+ /* Now make sure vlist is correct for each dl. */
+ for (dl = ddf->dlist; dl; dl = dl->next) {
+ int dn;
+ int vn = 0;
+ for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+ for (dn=0; dn < ddf->mppe ; dn++)
+ if (vcl->conf.phys_refnum[dn] ==
+ dl->disk.refnum) {
+ printf("dev %d has %p at %d\n",
+ dl->pdnum, vcl, vn);
+ dl->vlist[vn++] = vcl;
+ break;
+ }
+ while (vn < ddf->max_part)
+ dl->vlist[vn++] = NULL;
+ if (dl->vlist[0]) {
+ ddf->phys->entries[dl->pdnum].type &=
+ ~__cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type |=
+ __cpu_to_be16(DDF_Active_in_VD);
+ }
+ if (dl->spare) {
+ ddf->phys->entries[dl->pdnum].type &=
+ ~__cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type |=
+ __cpu_to_be16(DDF_Spare);
+ }
+ if (!dl->vlist[0] && !dl->spare) {
+ ddf->phys->entries[dl->pdnum].type |=
+ __cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type &=
+ ~__cpu_to_be16(DDF_Spare |
+ DDF_Active_in_VD);
+ }
+ }
+ break;
+ case DDF_SPARE_ASSIGN_MAGIC:
+ default: break;
+ }
+}
+
+/*
+ * Check if the array 'a' is degraded but not failed.
+ * If it is, find as many spares as are available and needed and
+ * arrange for their inclusion.
+ * We only choose devices which are not already in the array,
+ * and prefer those with a spare-assignment to this array.
+ * otherwise we choose global spares - assuming always that
+ * there is enough room.
+ * For each spare that we assign, we return an 'mdinfo' which
+ * describes the position for the device in the array.
+ * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
+ * the new phys_refnum and lba_offset values.
+ *
+ * Only worry about BVDs at the moment.
+ */
+static struct mdinfo *ddf_activate_spare(struct active_array *a,
+ struct metadata_update **updates)
+{
+ int working = 0;
+ struct mdinfo *d;
+ struct ddf_super *ddf = a->container->sb;
+ int global_ok = 0;
+ struct mdinfo *rv = NULL;
+ struct mdinfo *di;
+ struct metadata_update *mu;
+ struct dl *dl;
+ int i;
+ struct vd_config *vc;
+ __u64 *lba;
+
+/* FIXME, If there is a DS_FAULTY, we want to wait for it to be
+ * removed. Then only look at DS_REMOVE devices.
+ * What about !DS_INSYNC - how can that happen?
+ */
+ for (d = a->info.devs ; d ; d = d->next) {
+ if ((d->curr_state & DS_FAULTY) &&
+ d->state_fd >= 0)
+ /* wait for Removal to happen */
+ return NULL;
+ if (d->state_fd >= 0)
+ working ++;
+ }
+
+ printf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
+ a->info.array.level);
+ if (working == a->info.array.raid_disks)
+ return NULL; /* array not degraded */
+ switch (a->info.array.level) {
+ case 1:
+ if (working == 0)
+ return NULL; /* failed */
+ break;
+ case 4:
+ case 5:
+ if (working < a->info.array.raid_disks - 1)
+ return NULL; /* failed */
+ break;
+ case 6:
+ if (working < a->info.array.raid_disks - 2)
+ return NULL; /* failed */
+ break;
+ default: /* concat or stripe */
+ return NULL; /* failed */
+ }
+
+ /* For each slot, if it is not working, find a spare */
+ dl = ddf->dlist;
+ for (i = 0; i < a->info.array.raid_disks; i++) {
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->disk.raid_disk == i)
+ break;
+ printf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+ if (d && (d->state_fd >= 0))
+ continue;
+
+ /* OK, this device needs recovery. Find a spare */
+ again:
+ for ( ; dl ; dl = dl->next) {
+ unsigned long long esize;
+ unsigned long long pos;
+ struct mdinfo *d2;
+ int is_global = 0;
+ int is_dedicated = 0;
+ struct extent *ex;
+ int j;
+ /* If in this array, skip */
+ for (d2 = a->info.devs ; d2 ; d2 = d2->next)
+ if (d2->disk.major == dl->major &&
+ d2->disk.minor == dl->minor) {
+ printf("%x:%x already in array\n", dl->major, dl->minor);
+ break;
+ }
+ if (d2)
+ continue;
+ if (ddf->phys->entries[dl->pdnum].type &
+ __cpu_to_be16(DDF_Spare)) {
+ /* Check spare assign record */
+ if (dl->spare) {
+ if (dl->spare->type & DDF_spare_dedicated) {
+ /* check spare_ents for guid */
+ for (j = 0 ;
+ j < __be16_to_cpu(dl->spare->populated);
+ j++) {
+ if (memcmp(dl->spare->spare_ents[j].guid,
+ ddf->virt->entries[a->info.container_member].guid,
+ DDF_GUID_LEN) == 0)
+ is_dedicated = 1;
+ }
+ } else
+ is_global = 1;
+ }
+ } else if (ddf->phys->entries[dl->pdnum].type &
+ __cpu_to_be16(DDF_Global_Spare)) {
+ is_global = 1;
+ }
+ if ( ! (is_dedicated ||
+ (is_global && global_ok))) {
+ printf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
+ is_dedicated, is_global);
+ continue;
+ }
+
+ /* We are allowed to use this device - is there space?
+ * We need a->info.component_size sectors */
+ ex = get_extents(ddf, dl);
+ if (!ex) {
+ printf("cannot get extents\n");
+ continue;
+ }
+ j = 0; pos = 0;
+ esize = 0;
+
+ do {
+ esize = ex[j].start - pos;
+ if (esize >= a->info.component_size)
+ break;
+ pos = ex[i].start + ex[i].size;
+ i++;
+ } while (ex[i-1].size);
+
+ free(ex);
+ if (esize < a->info.component_size) {
+ printf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor,
+ esize, a->info.component_size);
+ /* No room */
+ continue;
+ }
+
+ /* Cool, we have a device with some space at pos */
+ di = malloc(sizeof(*di));
+ memset(di, 0, sizeof(*di));
+ di->disk.number = i;
+ di->disk.raid_disk = i;
+ di->disk.major = dl->major;
+ di->disk.minor = dl->minor;
+ di->disk.state = 0;
+ di->data_offset = pos;
+ di->component_size = a->info.component_size;
+ di->container_member = dl->pdnum;
+ di->next = rv;
+ rv = di;
+ printf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+ i, pos);
+
+ break;
+ }
+ if (!dl && ! global_ok) {
+ /* not enough dedicated spares, try global */
+ global_ok = 1;
+ dl = ddf->dlist;
+ goto again;
+ }
+ }
+
+ if (!rv)
+ /* No spares found */
+ return rv;
+ /* Now 'rv' has a list of devices to return.
+ * Create a metadata_update record to update the
+ * phys_refnum and lba_offset values
+ */
+ mu = malloc(sizeof(*mu));
+ mu->buf = malloc(ddf->conf_rec_len * 512);
+ mu->space = malloc(sizeof(struct vcl));
+ mu->len = ddf->conf_rec_len;
+ mu->next = *updates;
+ vc = find_vdcr(ddf, a->info.container_member);
+ memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
+
+ vc = (struct vd_config*)mu->buf;
+ lba = (__u64*)&vc->phys_refnum[ddf->mppe];
+ for (di = rv ; di ; di = di->next) {
+ vc->phys_refnum[di->disk.raid_disk] =
+ ddf->phys->entries[dl->pdnum].refnum;
+ lba[di->disk.raid_disk] = di->data_offset;
+ }
+ *updates = mu;
+ return rv;
+}
+
struct superswitch super_ddf = {
#ifndef MDASSEMBLE
.examine_super = examine_super_ddf,
.set_array_state= ddf_set_array_state,
.set_disk = ddf_set_disk,
.sync_metadata = ddf_sync_metadata,
-
+ .process_update = ddf_process_update,
+ .activate_spare = ddf_activate_spare,
};