free(d->spare);
free(d);
}
+ while (ddf->add_list) {
+ struct dl *d = ddf->add_list;
+ ddf->add_list = d->next;
+ if (d->fd >= 0)
+ close(d->fd);
+ if (d->spare)
+ free(d->spare);
+ free(d);
+ }
free(ddf);
st->sb = NULL;
}
(type&8) ? "spare" : "",
(type&16)? ", foreign" : "",
(type&32)? "pass-through" : "");
+ if (state & DDF_Failed)
+ /* This over-rides these three */
+ state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
printf("/%s%s%s%s%s%s%s",
(state&1)? "Online": "Offline",
(state&2)? ", Failed": "",
{
struct ddf_super *ddf = st->sb;
int map_disks = info->array.raid_disks;
+ __u32 *cptr;
if (ddf->currentconf) {
getinfo_super_ddf_bvd(st, info, map);
return;
}
+ memset(info, 0, sizeof(*info));
info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
info->array.level = LEVEL_CONTAINER;
info->array.layout = 0;
info->array.md_minor = -1;
- info->array.ctime = DECADE + __be32_to_cpu(*(__u32*)
- (ddf->anchor.guid+16));
+ cptr = (__u32 *)(ddf->anchor.guid + 16);
+ info->array.ctime = DECADE + __be32_to_cpu(*cptr);
+
info->array.utime = 0;
info->array.chunk_size = 0;
info->container_enough = 1;
info->recovery_start = MaxSector;
info->reshape_active = 0;
+ info->recovery_blocked = 0;
info->name[0] = 0;
info->array.major_version = -1;
int j;
struct dl *dl;
int map_disks = info->array.raid_disks;
+ __u32 *cptr;
+ memset(info, 0, sizeof(*info));
/* FIXME this returns BVD info - what if we want SVD ?? */
info->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count);
info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
info->array.raid_disks);
info->array.md_minor = -1;
- info->array.ctime = DECADE +
- __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+ cptr = (__u32 *)(vc->conf.guid + 16);
+ info->array.ctime = DECADE + __be32_to_cpu(*cptr);
info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
info->array.chunk_size = 512 << vc->conf.chunk_shift;
info->custom_array_size = 0;
}
for (dl = ddf->dlist; dl ; dl = dl->next)
- if (dl->raiddisk == info->disk.raid_disk)
+ if (dl->raiddisk == ddf->currentdev)
break;
+
info->disk.major = 0;
info->disk.minor = 0;
+ info->disk.state = 0;
if (dl) {
info->disk.major = dl->major;
info->disk.minor = dl->minor;
+ info->disk.raid_disk = dl->raiddisk;
+ info->disk.number = dl->pdnum;
+ info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
}
-// info->disk.number = __be32_to_cpu(ddf->disk.refnum);
-// info->disk.raid_disk = find refnum in the table and use index;
-// info->disk.state = ???;
info->container_member = ddf->currentconf->vcnum;
info->recovery_start = MaxSector;
info->resync_start = 0;
info->reshape_active = 0;
+ info->recovery_blocked = 0;
if (!(ddf->virt->entries[info->container_member].state
& DDF_state_inconsistent) &&
(ddf->virt->entries[info->container_member].init_state
struct phys_disk_entry *pde;
unsigned int n, i;
struct stat stb;
+ __u32 *tptr;
if (ddf->currentconf) {
add_to_super_ddf_bvd(st, dk, fd, devname);
tm = localtime(&now);
sprintf(dd->disk.guid, "%8s%04d%02d%02d",
T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
- *(__u32*)(dd->disk.guid + 16) = random32();
- *(__u32*)(dd->disk.guid + 20) = random32();
+ tptr = (__u32 *)(dd->disk.guid + 16);
+ *tptr++ = random32();
+ *tptr = random32();
do {
/* Cannot be bothered finding a CRC of some irrelevant details*/
return 0;
}
+static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
+{
+ struct ddf_super *ddf = st->sb;
+ struct dl *dl;
+
+ /* mdmon has noticed that this disk (dk->major/dk->minor) has
+ * disappeared from the container.
+ * We need to arrange that it disappears from the metadata and
+ * internal data structures too.
+ * Most of the work is done by ddf_process_update which edits
+ * the metadata and closes the file handle and attaches the memory
+ * where free_updates will free it.
+ */
+ for (dl = ddf->dlist; dl ; dl = dl->next)
+ if (dl->major == dk->major &&
+ dl->minor == dk->minor)
+ break;
+ if (!dl)
+ return -1;
+
+ if (st->update_tail) {
+ int len = (sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry));
+ struct phys_disk *pd;
+
+ pd = malloc(len);
+ pd->magic = DDF_PHYS_RECORDS_MAGIC;
+ pd->used_pdes = __cpu_to_be16(dl->pdnum);
+ pd->entries[0].state = __cpu_to_be16(DDF_Missing);
+ append_metadata_update(st, pd, len);
+ }
+ return 0;
+}
+
/*
* This is the write_init_super method for a ddf container. It is
* called when creating a container or adding another device to a
* container.
*/
-
-static unsigned char null_conf[4096+512];
+#define NULL_CONF_SZ 4096
static int __write_init_super_ddf(struct supertype *st)
{
int attempts = 0;
int successes = 0;
unsigned long long size, sector;
+ char *null_aligned;
+
+ if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
+ return -ENOMEM;
+ }
+ memset(null_aligned, 0xff, NULL_CONF_SZ);
/* try to write updated metadata,
* if we catch a failure move on to the next disk
if (write(fd, &c->conf, conf_size) < 0)
break;
} else {
- char *null_aligned = (char*)((((unsigned long)null_conf)+511)&~511UL);
- if (null_conf[0] != 0xff)
- memset(null_conf, 0xff, sizeof(null_conf));
unsigned int togo = conf_size;
- while (togo > sizeof(null_conf)-512) {
- if (write(fd, null_aligned, sizeof(null_conf)-512) < 0)
+ while (togo > NULL_CONF_SZ) {
+ if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
break;
- togo -= sizeof(null_conf)-512;
+ togo -= NULL_CONF_SZ;
}
if (write(fd, null_aligned, togo) < 0)
break;
continue;
successes++;
}
+ free(null_aligned);
return attempts != successes;
}
continue;
/* This is bigger than 'size', see if there are enough */
cnt = 0;
- for (dl2 = dl; dl2 ; dl2=dl2->next)
+ for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
if (dl2->esize >= dl->esize)
cnt++;
if (cnt >= raiddisks)
if (chunk && *chunk == UnSet)
*chunk = DEFAULT_CHUNK;
-
+ if (level == -1000000) level = LEVEL_CONTAINER;
if (level == LEVEL_CONTAINER) {
/* Must be a fresh device to add to a container */
return validate_geometry_ddf_container(st, level, layout,
unsigned int j;
struct mdinfo *this;
char *ep;
+ __u32 *cptr;
if (subarray &&
(strtoul(subarray, &ep, 10) != vc->vcnum ||
this->array.md_minor = -1;
this->array.major_version = -1;
this->array.minor_version = -2;
- this->array.ctime = DECADE +
- __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+ cptr = (__u32 *)(vc->conf.guid + 16);
+ this->array.ctime = DECADE + __be32_to_cpu(*cptr);
this->array.utime = DECADE +
__be32_to_cpu(vc->conf.timestamp);
this->array.chunk_size = 512 << vc->conf.chunk_shift;
for (i = 0 ; i < ddf->mppe ; i++) {
struct mdinfo *dev;
struct dl *d;
+ int stt;
+ int pd;
if (vc->conf.phys_refnum[i] == 0xFFFFFFFF)
continue;
+ for (pd = __be16_to_cpu(ddf->phys->used_pdes);
+ pd--;)
+ if (ddf->phys->entries[pd].refnum
+ == vc->conf.phys_refnum[i])
+ break;
+ if (pd < 0)
+ continue;
+
+ stt = __be16_to_cpu(ddf->phys->entries[pd].state);
+ if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+ != DDF_Online)
+ continue;
+
this->array.working_disks++;
for (d = ddf->dlist; d ; d=d->next)
return consistent;
}
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
/*
* The state of each disk is stored in the global phys_disk structure
* in phys_disk.entries[n].state.
struct vd_config *vc = find_vdcr(ddf, inst);
int pd = find_phys(ddf, vc->phys_refnum[n]);
int i, st, working;
+ struct mdinfo *mdi;
+ struct dl *dl;
if (vc == NULL) {
dprintf("ddf: cannot find instance %d!!\n", inst);
return;
}
- if (pd < 0) {
- /* disk doesn't currently exist. If it is now in_sync,
- * insert it. */
+ /* Find the matching slot in 'info'. */
+ for (mdi = a->info.devs; mdi; mdi = mdi->next)
+ if (mdi->disk.raid_disk == n)
+ break;
+ if (!mdi)
+ return;
+
+ /* and find the 'dl' entry corresponding to that. */
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (mdi->state_fd >= 0 &&
+ mdi->disk.major == dl->major &&
+ mdi->disk.minor == dl->minor)
+ break;
+ if (!dl)
+ return;
+
+ if (pd < 0 || pd != dl->pdnum) {
+ /* disk doesn't currently exist or has changed.
+ * If it is now in_sync, insert it. */
if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
- /* Find dev 'n' in a->info->devs, determine the
- * ddf refnum, and set vc->phys_refnum and update
- * phys->entries[]
- */
- /* FIXME */
+ struct vcl *vcl;
+ pd = dl->pdnum;
+ vc->phys_refnum[n] = dl->disk.refnum;
+ vcl = container_of(vc, struct vcl, conf);
+ vcl->lba_offset[n] = mdi->data_offset;
+ ddf->phys->entries[pd].type &=
+ ~__cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[pd].type |=
+ __cpu_to_be16(DDF_Active_in_VD);
+ ddf->updates_pending = 1;
}
} else {
int old = ddf->phys->entries[pd].state;
case DDF_RAID1:
if (working == 0)
state = DDF_state_failed;
+ else if (working == 2 && state == DDF_state_degraded)
+ state = DDF_state_part_optimal;
break;
case DDF_RAID4:
case DDF_RAID5:
* our actions.
* Possible update are:
* DDF_PHYS_RECORDS_MAGIC
- * Add a new physical device. Changes to this record
- * only happen implicitly.
+ * Add a new physical device or remove an old one.
+ * Changes to this record only happen implicitly.
* used_pdes is the device number.
* DDF_VIRT_RECORDS_MAGIC
* Add a new VD. Possibly also change the 'access' bits.
struct dl *dl;
unsigned int mppe;
unsigned int ent;
+ unsigned int pdnum, pd2;
dprintf("Process update %x\n", *magic);
ent = __be16_to_cpu(pd->used_pdes);
if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
return;
+ if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
+ struct dl **dlp;
+ /* removing this disk. */
+ ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
+ for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
+ struct dl *dl = *dlp;
+ if (dl->pdnum == (signed)ent) {
+ close(dl->fd);
+ dl->fd = -1;
+ /* FIXME this doesn't free
+ * dl->devname */
+ update->space = dl;
+ *dlp = dl->next;
+ break;
+ }
+ }
+ ddf->updates_pending = 1;
+ return;
+ }
if (!all_ff(ddf->phys->entries[ent].guid))
return;
ddf->phys->entries[ent] = pd->entries[0];
memcpy(&vcl->conf, vc, update->len);
vcl->lba_offset = (__u64*)
&vcl->conf.phys_refnum[mppe];
+ for (ent = 0;
+ ent < __be16_to_cpu(ddf->virt->populated_vdes);
+ ent++)
+ if (memcmp(vc->guid, ddf->virt->entries[ent].guid,
+ DDF_GUID_LEN) == 0) {
+ vcl->vcnum = ent;
+ break;
+ }
ddf->conflist = vcl;
}
+ /* Set DDF_Transition on all Failed devices - to help
+ * us detect those that are no longer in use
+ */
+ for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
+ if (ddf->phys->entries[pdnum].state
+ & __be16_to_cpu(DDF_Failed))
+ ddf->phys->entries[pdnum].state
+ |= __be16_to_cpu(DDF_Transition);
/* Now make sure vlist is correct for each dl. */
for (dl = ddf->dlist; dl; dl = dl->next) {
unsigned int dn;
unsigned int vn = 0;
+ int in_degraded = 0;
for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
for (dn=0; dn < ddf->mppe ; dn++)
if (vcl->conf.phys_refnum[dn] ==
dl->disk.refnum) {
+ int vstate;
dprintf("dev %d has %p at %d\n",
dl->pdnum, vcl, vn);
+ /* Clear the Transition flag */
+ if (ddf->phys->entries[dl->pdnum].state
+ & __be16_to_cpu(DDF_Failed))
+ ddf->phys->entries[dl->pdnum].state &=
+ ~__be16_to_cpu(DDF_Transition);
+
dl->vlist[vn++] = vcl;
+ vstate = ddf->virt->entries[vcl->vcnum].state
+ & DDF_state_mask;
+ if (vstate == DDF_state_degraded ||
+ vstate == DDF_state_part_optimal)
+ in_degraded = 1;
break;
}
while (vn < ddf->max_part)
if (dl->vlist[0]) {
ddf->phys->entries[dl->pdnum].type &=
~__cpu_to_be16(DDF_Global_Spare);
- ddf->phys->entries[dl->pdnum].type |=
- __cpu_to_be16(DDF_Active_in_VD);
+ if (!(ddf->phys->entries[dl->pdnum].type &
+ __cpu_to_be16(DDF_Active_in_VD))) {
+ ddf->phys->entries[dl->pdnum].type |=
+ __cpu_to_be16(DDF_Active_in_VD);
+ if (in_degraded)
+ ddf->phys->entries[dl->pdnum].state |=
+ __cpu_to_be16(DDF_Rebuilding);
+ }
}
if (dl->spare) {
ddf->phys->entries[dl->pdnum].type &=
DDF_Active_in_VD);
}
}
+
+ /* Now remove any 'Failed' devices that are not part
+ * of any VD. They will have the Transition flag set.
+ * Once done, we need to update all dl->pdnum numbers.
+ */
+ pd2 = 0;
+ for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
+ if ((ddf->phys->entries[pdnum].state
+ & __be16_to_cpu(DDF_Failed))
+ && (ddf->phys->entries[pdnum].state
+ & __be16_to_cpu(DDF_Transition)))
+ /* skip this one */;
+ else if (pdnum == pd2)
+ pd2++;
+ else {
+ ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->pdnum == (int)pdnum)
+ dl->pdnum = pd2;
+ pd2++;
+ }
+ ddf->phys->used_pdes = __cpu_to_be16(pd2);
+ while (pd2 < pdnum) {
+ memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
+ pd2++;
+ }
+
ddf->updates_pending = 1;
break;
case DDF_SPARE_ASSIGN_MAGIC:
unsigned int j;
/* If in this array, skip */
for (d2 = a->info.devs ; d2 ; d2 = d2->next)
- if (d2->disk.major == dl->major &&
+ if (d2->state_fd >= 0 &&
+ d2->disk.major == dl->major &&
d2->disk.minor == dl->minor) {
dprintf("%x:%x already in array\n", dl->major, dl->minor);
break;
} else if (ddf->phys->entries[dl->pdnum].type &
__cpu_to_be16(DDF_Global_Spare)) {
is_global = 1;
+ } else if (!(ddf->phys->entries[dl->pdnum].state &
+ __cpu_to_be16(DDF_Failed))) {
+ /* we can possibly use some of this */
+ is_global = 1;
}
if ( ! (is_dedicated ||
(is_global && global_ok))) {
esize = ex[j].start - pos;
if (esize >= a->info.component_size)
break;
- pos = ex[i].start + ex[i].size;
- i++;
- } while (ex[i-1].size);
+ pos = ex[j].start + ex[j].size;
+ j++;
+ } while (ex[j-1].size);
free(ex);
if (esize < a->info.component_size) {
- dprintf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor,
+ dprintf("%x:%x has no room: %llu %llu\n",
+ dl->major, dl->minor,
esize, a->info.component_size);
/* No room */
continue;
}
mu->buf = malloc(ddf->conf_rec_len * 512);
- mu->len = ddf->conf_rec_len;
+ mu->len = ddf->conf_rec_len * 512;
+ mu->space = NULL;
+ mu->space_list = NULL;
mu->next = *updates;
vc = find_vdcr(ddf, a->info.container_member);
memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
.validate_geometry = validate_geometry_ddf,
.write_init_super = write_init_super_ddf,
.add_to_super = add_to_super_ddf,
+ .remove_from_super = remove_from_super_ddf,
.load_container = load_container_ddf,
#endif
.match_home = match_home_ddf,