/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2006-2014 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* Author: Neil Brown
* Email: <neil@brown.name>
*
- * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
+ * Specifications for DDF taken from Common RAID DDF Specification Revision 1.2
* (July 28 2006). Reused by permission of SNIA.
*/
#define DDF_NOTFOUND (~0U)
#define DDF_CONTAINER (DDF_NOTFOUND-1)
+/* Default for safe_mode_delay. Same value as for IMSM.
+ */
+static const int DDF_SAFE_MODE_DELAY = 4000;
+
/* The DDF metadata handling.
* DDF metadata lives at the end of the device.
* The last 512 byte block provides an 'anchor' which is used to locate
be32 refnum;
be16 type;
be16 state;
- be64 config_size; /* DDF structures must be after here */
- char path[18]; /* another horrible structure really */
+ be64 config_size; /* DDF structures must be after here */
+ char path[18]; /* Another horrible structure really
+ * but is "used for information
+ * purposes only" */
__u8 pad[6];
} entries[0];
};
* and reconstructed for writing. This means that we only need
* to make config changes once and they are automatically
* propagated to all devices.
- * Note that the ddf_super has space of the conf and disk data
- * for this disk and also for a list of all such data.
- * The list is only used for the superblock that is being
- * built in Create or Assemble to describe the whole array.
+ * The global (config and disk data) records are each in a list
+ * of separate data structures. When writing we find the entry
+ * or entries applicable to the particular device.
*/
struct ddf_super {
- struct ddf_header anchor, primary, secondary;
+ struct ddf_header anchor, primary, secondary;
struct ddf_controller_data controller;
- struct ddf_header *active;
+ struct ddf_header *active;
struct phys_disk *phys;
struct virtual_disk *virt;
- int pdsize, vdsize;
- unsigned int max_part, mppe, conf_rec_len;
- int currentdev;
- int updates_pending;
+ char *conf;
+ int pdsize, vdsize;
+ unsigned int max_part, mppe, conf_rec_len;
+ int currentdev;
+ int updates_pending;
struct vcl {
union {
char space[512];
struct {
struct vcl *next;
unsigned int vcnum; /* index into ->virt */
+ /* For an array with a secondary level there are
+ * multiple vd_config structures, all with the same
+ * guid but with different sec_elmnt_seq.
+ * One of these structures is in 'conf' below.
+ * The others are in other_bvds, not in any
+ * particular order.
+ */
struct vd_config **other_bvds;
__u64 *block_sizes; /* NULL if all the same */
};
} *dlist, *add_list;
};
+#ifndef MDASSEMBLE
+static int load_super_ddf_all(struct supertype *st, int fd,
+ void **sbp, char *devname);
+static int get_svd_state(const struct ddf_super *, const struct vcl *);
+static int
+validate_geometry_ddf_container(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose);
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose);
+#endif
+
+static void free_super_ddf(struct supertype *st);
+static int all_ff(const char *guid);
+static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
+ be32 refnum, unsigned int nmax,
+ const struct vd_config **bvd,
+ unsigned int *idx);
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
+static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
+static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
+static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i);
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
+static int init_super_ddf_bvd(struct supertype *st,
+ mdu_array_info_t *info,
+ unsigned long long size,
+ char *name, char *homehost,
+ int *uuid, unsigned long long data_offset);
+
#ifndef offsetof
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
#endif
#if DEBUG
-static int all_ff(const char *guid);
static void pr_state(struct ddf_super *ddf, const char *msg)
{
unsigned int i;
static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
{
+ if (ddf->updates_pending)
+ return;
ddf->updates_pending = 1;
ddf->active->seq = cpu_to_be32((be32_to_cpu(ddf->active->seq)+1));
pr_state(ddf, func);
#define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
-static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
- be32 refnum, unsigned int nmax,
- const struct vd_config **bvd,
- unsigned int *idx);
-
static be32 calc_crc(void *buf, int len)
{
/* crcs are always at the same place as in the ddf_header */
newcrc = crc32(0, buf, len);
ddf->crc = oldcrc;
- /* The crc is store (like everything) bigendian, so convert
+ /* The crc is stored (like everything) bigendian, so convert
* here for simplicity
*/
return cpu_to_be32(newcrc);
if (len > 1024)
return NULL;
- if (buf) {
- /* All pre-allocated sections are a single block */
- if (len != 1)
- return NULL;
- } else if (posix_memalign(&buf, 512, len<<9) != 0)
+ if (!buf && posix_memalign(&buf, 512, len<<9) != 0)
buf = NULL;
if (!buf)
super->primary.openflag && !super->secondary.openflag)
)
super->active = &super->secondary;
- } else if (devname)
+ } else if (devname &&
+ be64_to_cpu(super->anchor.secondary_lba) != ~(__u64)0)
pr_err("Failed to load secondary DDF header on %s\n",
devname);
if (super->active == NULL)
unsigned int i;
unsigned int confsec;
int vnum;
- unsigned int max_virt_disks = be16_to_cpu
- (super->active->max_vd_entries);
+ unsigned int max_virt_disks =
+ be16_to_cpu(super->active->max_vd_entries);
unsigned long long dsize;
/* First the local disk info */
if (posix_memalign((void**)&dl, 512,
- sizeof(*dl) +
- (super->max_part) * sizeof(dl->vlist[0])) != 0) {
+ sizeof(*dl) +
+ (super->max_part) * sizeof(dl->vlist[0])) != 0) {
pr_err("%s could not allocate disk info buffer\n",
- __func__);
+ __func__);
return 1;
}
* the conflist
*/
- conf = load_section(fd, super, NULL,
+ conf = load_section(fd, super, super->conf,
super->active->config_section_offset,
super->active->config_section_length,
0);
-
+ super->conf = conf;
vnum = 0;
for (confsec = 0;
confsec < be32_to_cpu(super->active->config_section_length);
if (dl->spare)
continue;
if (posix_memalign((void**)&dl->spare, 512,
- super->conf_rec_len*512) != 0) {
+ super->conf_rec_len*512) != 0) {
pr_err("%s could not allocate spare info buf\n",
__func__);
return 1;
continue;
}
if (!be32_eq(vd->magic, DDF_VD_CONF_MAGIC))
+ /* Must be vendor-unique - I cannot handle those */
continue;
+
for (vcl = super->conflist; vcl; vcl = vcl->next) {
if (memcmp(vcl->conf.guid,
vd->guid, DDF_GUID_LEN) == 0)
continue;
} else {
if (posix_memalign((void**)&vcl, 512,
- (super->conf_rec_len*512 +
- offsetof(struct vcl, conf))) != 0) {
+ (super->conf_rec_len*512 +
+ offsetof(struct vcl, conf))) != 0) {
pr_err("%s could not allocate vcl buf\n",
__func__);
return 1;
if (i < max_virt_disks)
vcl->vcnum = i;
}
- free(conf);
return 0;
}
-#ifndef MDASSEMBLE
-static int load_super_ddf_all(struct supertype *st, int fd,
- void **sbp, char *devname);
-#endif
-
-static void free_super_ddf(struct supertype *st);
-
static int load_super_ddf(struct supertype *st, int fd,
char *devname)
{
if (get_dev_size(fd, devname, &dsize) == 0)
return 1;
- if (!st->ignore_hw_compat && test_partition(fd))
+ if (test_partition(fd))
/* DDF is not allowed on partitions */
return 1;
return;
free(ddf->phys);
free(ddf->virt);
+ free(ddf->conf);
while (ddf->conflist) {
struct vcl *v = ddf->conflist;
ddf->conflist = v->next;
static struct supertype *match_metadata_desc_ddf(char *arg)
{
- /* 'ddf' only support containers */
+ /* 'ddf' only supports containers */
struct supertype *st;
if (strcmp(arg, "ddf") != 0 &&
strcmp(arg, "default") != 0
printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
printf("\n");
printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
- printf(" Redundant hdr : %s\n", be32_eq(sb->secondary.magic,
+ printf(" Redundant hdr : %s\n", (be32_eq(sb->secondary.magic,
DDF_HEADER_MAGIC)
- ?"yes" : "no");
+ ?"yes" : "no"));
examine_vds(sb);
examine_pds(sb);
}
-static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
-
-static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
-static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
-
static unsigned int get_vd_num_of_subarray(struct supertype *st)
{
/*
static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
{
- /* We just write a generic DDF ARRAY entry
+ /* We write a DDF ARRAY member entry for each vd, identifying container
+ * by uuid and member by unit number and uuid.
*/
struct ddf_super *ddf = st->sb;
struct mdinfo info;
struct virtual_entry *ve = &ddf->virt->entries[i];
struct vcl vcl;
char nbuf1[64];
+ char namebuf[17];
if (all_ff(ve->guid))
continue;
memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
ddf->currentconf =&vcl;
+ vcl.vcnum = i;
uuid_from_super_ddf(st, info.uuid);
fname_from_uuid(st, &info, nbuf1, ':');
- printf("ARRAY container=%s member=%d UUID=%s\n",
+ _ddf_array_name(namebuf, ddf, i);
+ printf("ARRAY%s%s container=%s member=%d UUID=%s\n",
+ namebuf[0] == '\0' ? "" : " /dev/md/", namebuf,
nbuf+5, i, nbuf1+5);
}
}
printf("MD_METADATA=ddf\n");
printf("MD_LEVEL=container\n");
printf("MD_UUID=%s\n", nbuf+5);
+ printf("MD_DEVICES=%u\n",
+ be16_to_cpu(((struct ddf_super *)st->sb)->phys->used_pdes));
}
static int copy_metadata_ddf(struct supertype *st, int from, int to)
* So it is easiest to find the earliest of primary and
* secondary, and copy everything from there.
*
- * Anchor is 512 from end It contains primary_lba and secondary_lba
+ * Anchor is 512 from end. It contains primary_lba and secondary_lba
* we choose one of those
*/
*/
}
+static const char *vendors_with_variable_volume_UUID[] = {
+ "LSI ",
+};
+
+static int volume_id_is_reliable(const struct ddf_super *ddf)
+{
+ int n = ARRAY_SIZE(vendors_with_variable_volume_UUID);
+ int i;
+ for (i = 0; i < n; i++)
+ if (!memcmp(ddf->controller.guid,
+ vendors_with_variable_volume_UUID[i], 8))
+ return 0;
+ return 1;
+}
+
+static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
+ unsigned int vcnum, int uuid[4])
+{
+ char buf[DDF_GUID_LEN+18], sha[20], *p;
+ struct sha1_ctx ctx;
+ if (volume_id_is_reliable(ddf)) {
+ uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, uuid);
+ return;
+ }
+ /*
+ * Some fake RAID BIOSes (in particular, LSI ones) change the
+ * VD GUID at every boot. These GUIDs are not suitable for
+ * identifying an array. Luckily the header GUID appears to
+ * remain constant.
+ * We construct a pseudo-UUID from the header GUID and those
+ * properties of the subarray that we expect to remain constant.
+ */
+ memset(buf, 0, sizeof(buf));
+ p = buf;
+ memcpy(p, ddf->anchor.guid, DDF_GUID_LEN);
+ p += DDF_GUID_LEN;
+ memcpy(p, ddf->virt->entries[vcnum].name, 16);
+ p += 16;
+ *((__u16 *) p) = vcnum;
+ sha1_init_ctx(&ctx);
+ sha1_process_bytes(buf, sizeof(buf), &ctx);
+ sha1_finish_ctx(&ctx, sha);
+ memcpy(uuid, sha, 4*4);
+}
+
static void brief_detail_super_ddf(struct supertype *st)
{
struct mdinfo info;
else if (vcnum == DDF_NOTFOUND)
return;
else
- uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
+ uuid_of_ddf_subarray(ddf, vcnum, info.uuid);
fname_from_uuid(st, &info, nbuf,':');
printf(" UUID=%s", nbuf + 5);
}
{
/* It matches 'this' host if the controller is a
* Linux-MD controller with vendor_data matching
- * the hostname
+ * the hostname. It would be nice if we could
+ * test against controller found in /sys or somewhere...
*/
struct ddf_super *ddf = st->sb;
unsigned int len;
unsigned int *n_bvd)
{
/*
- * Find the index of the n-th valid physical disk in this BVD
+ * Find the index of the n-th valid physical disk in this BVD.
+ * Unused entries can be sprinkled in with the used entries,
+ * but don't count.
*/
unsigned int i, j;
- for (i = 0, j = 0; i < ddf->mppe &&
- j < be16_to_cpu(conf->prim_elmnt_count); i++) {
+ for (i = 0, j = 0;
+ i < ddf->mppe && j < be16_to_cpu(conf->prim_elmnt_count);
+ i++) {
if (be32_to_cpu(conf->phys_refnum[i]) != 0xffffffff) {
if (n == j) {
*n_bvd = i;
return 0;
}
+/* Given a member array instance number, and a raid disk within that instance,
+ * find the vd_config structure. The offset of the given disk in the phys_refnum
+ * table is returned in n_bvd.
+ * For two-level members with a secondary raid level the vd_config for
+ * the appropriate BVD is returned.
+ * The return value is always &vlc->conf, where vlc is returned in last pointer.
+ */
static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
unsigned int n,
unsigned int *n_bvd, struct vcl **vcl)
*/
struct ddf_super *ddf = st->sb;
struct vcl *vcl = ddf->currentconf;
- char *guid;
if (vcl)
- guid = vcl->conf.guid;
+ uuid_of_ddf_subarray(ddf, vcl->vcnum, uuid);
else
- guid = ddf->anchor.guid;
- uuid_from_ddf_guid(guid, uuid);
+ uuid_from_ddf_guid(ddf->anchor.guid, uuid);
}
-static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
-
static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
{
struct ddf_super *ddf = st->sb;
info->array.chunk_size = 0;
info->container_enough = 1;
- info->disk.major = 0;
- info->disk.minor = 0;
+ info->disk.major = 0;
+ info->disk.minor = 0;
if (ddf->dlist) {
+ struct phys_disk_entry *pde = NULL;
info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
entries[info->disk.raid_disk].
config_size);
info->component_size = ddf->dlist->size - info->data_offset;
+ if (info->disk.raid_disk >= 0)
+ pde = ddf->phys->entries + info->disk.raid_disk;
+ if (pde &&
+ !(be16_to_cpu(pde->state) & DDF_Failed))
+ info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
+ else
+ info->disk.state = 1 << MD_DISK_FAULTY;
+
+ info->events = be32_to_cpu(ddf->active->seq);
} else {
info->disk.number = -1;
info->disk.raid_disk = -1;
// info->disk.raid_disk = find refnum in the table and use index;
+ info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
}
- info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
info->recovery_start = MaxSector;
info->reshape_active = 0;
int i;
for (i = 0 ; i < map_disks; i++) {
if (i < info->array.raid_disks &&
- (be16_to_cpu(ddf->phys->entries[i].state)
- & DDF_Online) &&
!(be16_to_cpu(ddf->phys->entries[i].state)
& DDF_Failed))
map[i] = 1;
}
}
+/* size of name must be at least 17 bytes! */
+static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i)
+{
+ int j;
+ memcpy(name, ddf->virt->entries[i].name, 16);
+ name[16] = 0;
+ for(j = 0; j < 16; j++)
+ if (name[j] == ' ')
+ name[j] = 0;
+}
+
static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
{
struct ddf_super *ddf = st->sb;
info->disk.raid_disk = cd + conf->sec_elmnt_seq
* be16_to_cpu(conf->prim_elmnt_count);
info->disk.number = dl->pdnum;
- info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+ info->disk.state = 0;
+ if (info->disk.number >= 0 &&
+ (be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Online) &&
+ !(be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Failed))
+ info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+ info->events = be32_to_cpu(ddf->active->seq);
}
info->container_member = ddf->currentconf->vcnum;
sprintf(info->text_version, "/%s/%d",
st->container_devnm,
info->container_member);
- info->safe_mode_delay = 200;
+ info->safe_mode_delay = DDF_SAFE_MODE_DELAY;
- memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
- info->name[16]=0;
- for(j=0; j<16; j++)
- if (info->name[j] == ' ')
- info->name[j] = 0;
+ _ddf_array_name(info->name, ddf, info->container_member);
if (map)
for (j = 0; j < map_disks; j++) {
// struct virtual_entry *ve = find_ve(ddf);
/* we don't need to handle "force-*" or "assemble" as
- * there is no need to 'trick' the kernel. We the metadata is
+ * there is no need to 'trick' the kernel. When the metadata is
* first updated to activate the array, all the implied modifications
* will just happen.
*/
}
#endif
-static int init_super_ddf_bvd(struct supertype *st,
- mdu_array_info_t *info,
- unsigned long long size,
- char *name, char *homehost,
- int *uuid, unsigned long long data_offset);
-
static int init_super_ddf(struct supertype *st,
mdu_array_info_t *info,
unsigned long long size, char *name, char *homehost,
* We need to create the entire 'ddf' structure which includes:
* DDF headers - these are easy.
* Controller data - a Sector describing this controller .. not that
- * this is a controller exactly.
+ * this is a controller exactly.
* Physical Disk Record - one entry per device, so
- * leave plenty of space.
+ * leave plenty of space.
* Virtual Disk Records - again, just leave plenty of space.
- * This just lists VDs, doesn't give details
- * Config records - describes the VDs that use this disk
+ * This just lists VDs, doesn't give details.
+ * Config records - describe the VDs that use this disk
* DiskData - describes 'this' device.
* BadBlockManagement - empty
* Diag Space - empty
return 0;
}
memset(ddf, 0, sizeof(*ddf));
- ddf->dlist = NULL; /* no physical disks yet */
- ddf->conflist = NULL; /* No virtual disks yet */
st->sb = ddf;
if (info == NULL) {
* start 32MB from the end, and put the primary header there.
* Don't do secondary for now.
* We don't know exactly where that will be yet as it could be
- * different on each device. To just set up the lengths.
- *
+ * different on each device. So just set up the lengths.
*/
ddf->anchor.magic = DDF_HEADER_MAGIC;
ddf->anchor.workspace_len = cpu_to_be32(32768); /* Must be reserved */
/* Put this at bottom of 32M reserved.. */
ddf->anchor.workspace_lba = cpu_to_be64(~(__u64)0);
- max_phys_disks = 1023; /* Should be enough */
+ max_phys_disks = 1023; /* Should be enough, 4095 is also allowed */
ddf->anchor.max_pd_entries = cpu_to_be16(max_phys_disks);
- max_virt_disks = 255;
- ddf->anchor.max_vd_entries = cpu_to_be16(max_virt_disks); /* ?? */
- ddf->anchor.max_partitions = cpu_to_be16(64); /* ?? */
+ max_virt_disks = 255; /* 15, 63, 255, 1024, 4095 are all allowed */
+ ddf->anchor.max_vd_entries = cpu_to_be16(max_virt_disks);
ddf->max_part = 64;
- ddf->mppe = 256;
+ ddf->anchor.max_partitions = cpu_to_be16(ddf->max_part);
+ ddf->mppe = 256; /* 16, 64, 256, 1024, 4096 are all allowed */
ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
ddf->anchor.config_record_len = cpu_to_be16(ddf->conf_rec_len);
ddf->anchor.max_primary_element_entries = cpu_to_be16(ddf->mppe);
memset(ddf->anchor.pad3, 0xff, 54);
- /* controller sections is one sector long immediately
+ /* Controller section is one sector long immediately
* after the ddf header */
sector = 1;
ddf->anchor.controller_section_offset = cpu_to_be32(sector);
static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
{
- /* find a list of used extents on the give physical device
+ /* Find a list of used extents on the give physical device
* (dnum) of the given ddf.
* Return a malloced array of 'struct extent'
-
- * FIXME ignore DDF_Legacy devices?
-
*/
struct extent *rv;
int n = 0;
unsigned int i;
+ __u16 state = be16_to_cpu(ddf->phys->entries[dl->pdnum].state);
+
+ if ((state & (DDF_Online|DDF_Failed|DDF_Missing)) != DDF_Online)
+ return NULL;
rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
return 1;
}
-
#ifndef MDASSEMBLE
-static int get_svd_state(const struct ddf_super *, const struct vcl *);
-
static void add_to_super_ddf_bvd(struct supertype *st,
mdu_disk_info_t *dk, int fd, char *devname)
{
- /* fd and devname identify a device with-in the ddf container (st).
+ /* fd and devname identify a device within the ddf container (st).
* dk identifies a location in the new BVD.
* We need to find suitable free space in that device and update
* the phys_refnum and lba_offset for the newly created vd_config.
if (ddf->currentconf->block_sizes)
blocks = ddf->currentconf->block_sizes[dk->raid_disk];
+ /* First-fit */
do {
esize = ex[i].start - pos;
if (esize >= blocks)
return DDF_NOTFOUND;
}
-/* add a device to a container, either while creating it or while
+static void _set_config_size(struct phys_disk_entry *pde, const struct dl *dl)
+{
+ __u64 cfs, t;
+ cfs = min(dl->size - 32*1024*2ULL, be64_to_cpu(dl->primary_lba));
+ t = be64_to_cpu(dl->secondary_lba);
+ if (t != ~(__u64)0)
+ cfs = min(cfs, t);
+ /*
+ * Some vendor DDF structures interpret workspace_lba
+ * very differently than we do: Make a sanity check on the value.
+ */
+ t = be64_to_cpu(dl->workspace_lba);
+ if (t < cfs) {
+ __u64 wsp = cfs - t;
+ if (wsp > 1024*1024*2ULL && wsp > dl->size / 16) {
+ pr_err("%s: %x:%x: workspace size 0x%llx too big, ignoring\n",
+ __func__, dl->major, dl->minor, wsp);
+ } else
+ cfs = t;
+ }
+ pde->config_size = cpu_to_be64(cfs);
+ dprintf("%s: %x:%x config_size %llx, DDF structure is %llx blocks\n",
+ __func__, dl->major, dl->minor, cfs, dl->size-cfs);
+}
+
+/* Add a device to a container, either while creating it or while
* expanding a pre-existing container
*/
static int add_to_super_ddf(struct supertype *st,
} while (0)
__calc_lba(dd, ddf->dlist, workspace_lba, 32);
__calc_lba(dd, ddf->dlist, primary_lba, 16);
- __calc_lba(dd, ddf->dlist, secondary_lba, 32);
- pde->config_size = dd->workspace_lba;
+ if (ddf->dlist == NULL ||
+ be64_to_cpu(ddf->dlist->secondary_lba) != ~(__u64)0)
+ __calc_lba(dd, ddf->dlist, secondary_lba, 32);
+ _set_config_size(pde, dd);
sprintf(pde->path, "%17.17s","Information: nil") ;
memset(pde->pad, 0xff, 6);
* called when creating a container or adding another device to a
* container.
*/
-#define NULL_CONF_SZ 4096
-static char *null_aligned;
-static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
- int update)
+static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
{
unsigned long long sector;
struct ddf_header *header;
- int fd, i, n_config, conf_size;
+ int fd, i, n_config, conf_size, buf_size;
int ret = 0;
-
- if (null_aligned == NULL) {
- if (posix_memalign((void **)&null_aligned, 4096, NULL_CONF_SZ)
- != 0)
- return 0;
- memset(null_aligned, 0xff, NULL_CONF_SZ);
- }
+ char *conf;
fd = d->fd;
default:
return 0;
}
+ if (sector == ~(__u64)0)
+ return 0;
header->type = type;
header->openflag = 1;
/* Now write lots of config records. */
n_config = ddf->max_part;
conf_size = ddf->conf_rec_len * 512;
+ conf = ddf->conf;
+ buf_size = conf_size * (n_config + 1);
+ if (!conf) {
+ if (posix_memalign((void**)&conf, 512, buf_size) != 0)
+ goto out;
+ ddf->conf = conf;
+ }
for (i = 0 ; i <= n_config ; i++) {
struct vcl *c;
struct vd_config *vdc = NULL;
vdc->sec_elmnt_seq);
vdc->seqnum = header->seq;
vdc->crc = calc_crc(vdc, conf_size);
- if (write(fd, vdc, conf_size) < 0)
- break;
- } else if (!update) {
- unsigned int togo = conf_size;
- while (togo > NULL_CONF_SZ) {
- if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
- break;
- togo -= NULL_CONF_SZ;
- }
- if (write(fd, null_aligned, togo) < 0)
- break;
+ memcpy(conf + i*conf_size, vdc, conf_size);
} else
- lseek(fd, conf_size, SEEK_CUR);
+ memset(conf + i*conf_size, 0xff, conf_size);
}
- if (i <= n_config)
+ if (write(fd, conf, buf_size) != buf_size)
goto out;
d->disk.crc = calc_crc(&d->disk, 512);
return ret;
}
-static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d,
- int update)
+static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
{
unsigned long long size;
int fd = d->fd;
ddf->anchor.seq = cpu_to_be32(0xFFFFFFFF); /* no sequencing in anchor */
ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
- if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY, update))
+ if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
return 0;
- if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY, update))
+ if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
return 0;
lseek64(fd, (size-1)*512, SEEK_SET);
}
#ifndef MDASSEMBLE
-static int __write_init_super_ddf(struct supertype *st, int update)
+static int __write_init_super_ddf(struct supertype *st)
{
struct ddf_super *ddf = st->sb;
struct dl *d;
*/
for (d = ddf->dlist; d; d=d->next) {
attempts++;
- successes += _write_super_to_disk(ddf, d, update);
+ successes += _write_super_to_disk(ddf, d);
}
return attempts != successes;
struct ddf_super *ddf = st->sb;
struct vcl *currentconf = ddf->currentconf;
- /* we are done with currentconf reset it to point st at the container */
+ /* We are done with currentconf - reset it so st refers to the container */
ddf->currentconf = NULL;
if (st->update_tail) {
unsigned int i;
if (!currentconf) {
+ /* Must be adding a physical disk to the container */
int len = (sizeof(struct phys_disk) +
sizeof(struct phys_disk_entry));
if (!currentconf)
for (d = ddf->dlist; d; d=d->next)
while (Kill(d->devname, NULL, 0, -1, 1) == 0);
- return __write_init_super_ddf(st, 0);
+ return __write_init_super_ddf(st);
}
}
{
/* Find 'raiddisks' spare extents at least 'size' big (but
* only caring about multiples of 'chunk') and remember
- * them.
- * If the cannot be found, fail.
+ * them. If size==0, find the largest size possible.
+ * Report available size in *freesize
+ * If space cannot be found, fail.
*/
struct dl *dl;
struct ddf_super *ddf = st->sb;
return 1;
}
-static int
-validate_geometry_ddf_container(struct supertype *st,
- int level, int layout, int raiddisks,
- int chunk, unsigned long long size,
- unsigned long long data_offset,
- char *dev, unsigned long long *freesize,
- int verbose);
-
-static int validate_geometry_ddf_bvd(struct supertype *st,
- int level, int layout, int raiddisks,
- int *chunk, unsigned long long size,
- unsigned long long data_offset,
- char *dev, unsigned long long *freesize,
- int verbose);
-
static int validate_geometry_ddf(struct supertype *st,
int level, int layout, int raiddisks,
int *chunk, unsigned long long size,
if (*chunk == UnSet)
*chunk = DEFAULT_CHUNK;
- if (level == -1000000) level = LEVEL_CONTAINER;
+ if (level == LEVEL_NONE)
+ level = LEVEL_CONTAINER;
if (level == LEVEL_CONTAINER) {
/* Must be a fresh device to add to a container */
return validate_geometry_ddf_container(st, level, layout,
if (!dev) {
mdu_array_info_t array = {
- .level = level, .layout = layout,
+ .level = level,
+ .layout = layout,
.raid_disks = raiddisks
};
struct vd_config conf;
close(fd);
if (sra && sra->array.major_version == -1 &&
strcmp(sra->text_version, "ddf") == 0) {
-
/* load super */
/* find space for 'n' devices. */
/* remember the devices */
int dcnt = 0;
if (minsize == 0)
minsize = 8;
- for (dl = ddf->dlist; dl ; dl = dl->next)
- {
+ for (dl = ddf->dlist; dl ; dl = dl->next) {
int found = 0;
pos = 0;
e = get_extents(ddf, dl);
maxsize = 0;
i = 0;
- if (e) do {
+ if (e)
+ do {
unsigned long long esize;
esize = e[i].start - pos;
if (esize >= maxsize)
for (i = 0, j = 0 ; i < nmax ; i++) {
/* j counts valid entries for this BVD */
- if (be32_to_cpu(vc->conf.phys_refnum[i]) != 0xffffffff)
- j++;
if (be32_eq(vc->conf.phys_refnum[i], refnum)) {
*bvd = &vc->conf;
*idx = i;
- return sec * cnt + j - 1;
+ return sec * cnt + j;
}
+ if (be32_to_cpu(vc->conf.phys_refnum[i]) != 0xffffffff)
+ j++;
}
if (vc->other_bvds == NULL)
goto bad;
if (sec == DDF_UNUSED_BVD)
continue;
for (i = 0, j = 0 ; i < nmax ; i++) {
- if (be32_to_cpu(vd->phys_refnum[i]) != 0xffffffff)
- j++;
if (be32_eq(vd->phys_refnum[i], refnum)) {
*bvd = vd;
*idx = i;
- return sec * cnt + j - 1;
+ return sec * cnt + j;
}
+ if (be32_to_cpu(vd->phys_refnum[i]) != 0xffffffff)
+ j++;
}
}
bad:
struct mdinfo *rest = NULL;
struct vcl *vc;
- for (vc = ddf->conflist ; vc ; vc=vc->next)
- {
+ for (vc = ddf->conflist ; vc ; vc=vc->next) {
unsigned int i;
- unsigned int j;
struct mdinfo *this;
char *ep;
__u32 *cptr;
this->array.md_minor = -1;
this->array.major_version = -1;
this->array.minor_version = -2;
+ this->safe_mode_delay = DDF_SAFE_MODE_DELAY;
cptr = (__u32 *)(vc->conf.guid + 16);
this->array.ctime = DECADE + __be32_to_cpu(*cptr);
this->array.utime = DECADE +
this->array.state = 1;
this->resync_start = MaxSector;
}
- memcpy(this->name, ddf->virt->entries[i].name, 16);
- this->name[16]=0;
- for(j=0; j<16; j++)
- if (this->name[j] == ' ')
- this->name[j] = 0;
-
+ _ddf_array_name(this->name, ddf, i);
memset(this->uuid, 0, sizeof(this->uuid));
- this->component_size = be64_to_cpu(vc->conf.blocks);
- this->array.size = this->component_size / 2;
- this->container_member = i;
+ this->component_size = be64_to_cpu(vc->conf.blocks);
+ this->array.size = this->component_size / 2;
+ this->container_member = i;
ddf->currentconf = vc;
uuid_from_super_ddf(st, this->uuid);
continue;
dev = xcalloc(1, sizeof(*dev));
- dev->next = this->devs;
- this->devs = dev;
+ dev->next = this->devs;
+ this->devs = dev;
dev->disk.number = be32_to_cpu(d->disk.refnum);
- dev->disk.major = d->major;
- dev->disk.minor = d->minor;
+ dev->disk.major = d->major;
+ dev->disk.minor = d->minor;
dev->disk.raid_disk = i;
- dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+ dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
dev->recovery_start = MaxSector;
- dev->events = be32_to_cpu(ddf->primary.seq);
+ dev->events = be32_to_cpu(ddf->active->seq);
dev->data_offset =
be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
dev->component_size = be64_to_cpu(bvd->blocks);
}
ofd = dl->fd;
dl->fd = fd;
- ret = (_write_super_to_disk(ddf, dl, 0) != 1);
+ ret = (_write_super_to_disk(ddf, dl) != 1);
dl->fd = ofd;
return ret;
}
/*
* return:
* 0 same, or first was empty, and second was copied
- * 1 second had wrong number
+ * 1 second had wrong magic number - but that isn't possible
* 2 wrong uuid
* 3 wrong other info
*/
if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
return 2;
- if (!be32_eq(first->anchor.seq, second->anchor.seq)) {
- dprintf("%s: sequence number mismatch %u/%u\n", __func__,
- be32_to_cpu(first->anchor.seq),
- be32_to_cpu(second->anchor.seq));
- return 3;
- }
if (first->max_part != second->max_part ||
!be16_eq(first->phys->used_pdes, second->phys->used_pdes) ||
!be16_eq(first->virt->populated_vdes,
return 3;
}
- max_pds = be16_to_cpu(first->phys->used_pdes);
+ max_pds = be16_to_cpu(first->phys->used_pdes);
for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
for (pd = 0; pd < max_pds; pd++)
if (be32_eq(first->phys->entries[pd].refnum,
/* FIXME should I look at anything else? */
/*
- At this point we are fairly sure that the meta data matches.
- But the new disk may contain additional local data.
- Add it to the super block.
+ * At this point we are fairly sure that the meta data matches.
+ * But the new disk may contain additional local data.
+ * Add it to the super block.
*/
for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
for (vl1 = first->conflist; vl1; vl1 = vl1->next)
{
struct ddf_super *ddf = c->sb;
int n = atoi(inst);
+ struct mdinfo *dev;
+ struct dl *dl;
+ static const char faulty[] = "faulty";
+
if (all_ff(ddf->virt->entries[n].guid)) {
pr_err("%s: subarray %d doesn't exist\n", __func__, n);
return -ENODEV;
}
- dprintf("ddf: open_new %d\n", n);
+ dprintf("%s: new subarray %d, GUID: %s\n", __func__, n,
+ guid_str(ddf->virt->entries[n].guid));
+ for (dev = a->info.devs; dev; dev = dev->next) {
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->major == dev->disk.major &&
+ dl->minor == dev->disk.minor)
+ break;
+ if (!dl) {
+ pr_err("%s: device %d/%d of subarray %d not found in meta data\n",
+ __func__, dev->disk.major, dev->disk.minor, n);
+ return -1;
+ }
+ if ((be16_to_cpu(ddf->phys->entries[dl->pdnum].state) &
+ (DDF_Online|DDF_Missing|DDF_Failed)) != DDF_Online) {
+ pr_err("%s: new subarray %d contains broken device %d/%d (%02x)\n",
+ __func__, n, dl->major, dl->minor,
+ be16_to_cpu(
+ ddf->phys->entries[dl->pdnum].state));
+ if (write(dev->state_fd, faulty, sizeof(faulty)-1) !=
+ sizeof(faulty) - 1)
+ pr_err("Write to state_fd failed\n");
+ dev->curr_state = DS_FAULTY;
+ }
+ }
a->info.container_member = n;
return 0;
}
+static void handle_missing(struct ddf_super *ddf, int inst)
+{
+ /* This member array is being activated. If any devices
+ * are missing they must now be marked as failed.
+ */
+ struct vd_config *vc;
+ unsigned int n_bvd;
+ struct vcl *vcl;
+ struct dl *dl;
+ int n;
+
+ for (n = 0; ; n++) {
+ vc = find_vdcr(ddf, inst, n, &n_bvd, &vcl);
+ if (!vc)
+ break;
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (be32_eq(dl->disk.refnum, vc->phys_refnum[n_bvd]))
+ break;
+ if (dl)
+ /* Found this disk, so not missing */
+ continue;
+ vc->phys_refnum[n_bvd] = cpu_to_be32(0);
+ }
+}
+
/*
* The array 'a' is to be marked clean in the metadata.
* If '->resync_start' is not ~(unsigned long long)0, then the array is only
int inst = a->info.container_member;
int old = ddf->virt->entries[inst].state;
if (consistent == 2) {
+ handle_missing(ddf, inst);
/* Should check if a recovery should be started FIXME */
consistent = 1;
if (!is_resync_complete(&a->info))
if (ddf->virt->entries[inst].state !=
((ddf->virt->entries[inst].state & ~DDF_state_mask)
| state)) {
-
ddf->virt->entries[inst].state =
(ddf->virt->entries[inst].state & ~DDF_state_mask)
| state;
static void ddf_sync_metadata(struct supertype *st)
{
-
/*
* Write all data to all devices.
* Later, we might be able to track whether only local changes
if (!ddf->updates_pending)
return;
ddf->updates_pending = 0;
- __write_init_super_ddf(st, 1);
+ __write_init_super_ddf(st);
dprintf("ddf: sync_metadata\n");
}
*/
struct vcl *victim = ddf->currentconf;
struct vd_config *conf;
- ddf->currentconf = NULL;
unsigned int vdnum;
+
+ ddf->currentconf = NULL;
if (!victim) {
pr_err("%s: nothing to kill\n", __func__);
return -1;
dprintf("Process update %x\n", be32_to_cpu(*magic));
if (be32_eq(*magic, DDF_PHYS_RECORDS_MAGIC)) {
-
if (update->len != (sizeof(struct phys_disk) +
sizeof(struct phys_disk_entry)))
return;
a->check_degraded = 1;
}
} else if (be32_eq(*magic, DDF_VIRT_RECORDS_MAGIC)) {
-
if (update->len != (sizeof(struct virtual_disk) +
sizeof(struct virtual_entry)))
return;
if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
return;
} else {
-
ent = find_vde_by_guid(ddf, vd->entries[0].guid);
if (ent != DDF_NOTFOUND) {
dprintf("%s: VD %s exists already in slot %d\n",
vstate = ddf->virt->entries[vcl->vcnum].state
& DDF_state_mask;
if (vstate == DDF_state_degraded ||
- vstate == DDF_state_part_optimal)
+ vstate == DDF_state_part_optimal)
in_degraded = 1;
}
while (vn < ddf->max_part)
*/
pd2 = 0;
for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->used_pdes);
- pdnum++)
+ pdnum++) {
if (be16_and(ddf->phys->entries[pdnum].state,
cpu_to_be16(DDF_Failed))
&& be16_and(ddf->phys->entries[pdnum].state,
- cpu_to_be16(DDF_Transition)))
- /* skip this one */;
- else if (pdnum == pd2)
+ cpu_to_be16(DDF_Transition))) {
+ /* skip this one unless in dlist*/
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->pdnum == (int)pdnum)
+ break;
+ if (!dl)
+ continue;
+ }
+ if (pdnum == pd2)
pd2++;
else {
ddf->phys->entries[pd2] =
dl->pdnum = pd2;
pd2++;
}
+ }
ddf->phys->used_pdes = cpu_to_be16(pd2);
while (pd2 < pdnum) {
memset(ddf->phys->entries[pd2].guid, 0xff,
* arrange for their inclusion.
* We only choose devices which are not already in the array,
* and prefer those with a spare-assignment to this array.
- * otherwise we choose global spares - assuming always that
+ * Otherwise we choose global spares - assuming always that
* there is enough room.
* For each spare that we assign, we return an 'mdinfo' which
* describes the position for the device in the array.
* Create a metadata_update record to update the
* phys_refnum and lba_offset values
*/
- vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
+ vc = find_vdcr(ddf, a->info.container_member, rv->disk.raid_disk,
&n_bvd, &vcl);
if (vc == NULL)
return NULL;