#include <dirent.h>
#include <ctype.h>
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+ int mdfd, char **availp,
struct mdinfo *info);
static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
int number, __u64 events, int verbose,
char *array_name);
static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target,
struct supertype *st, int verbose);
static int Incremental_container(struct supertype *st, char *devname,
*/
struct stat stb;
struct mdinfo info, dinfo;
+ struct mdinfo *sra = NULL, *d;
struct mddev_ident *match;
char chosen_name[1024];
int rv = 1;
struct map_ent *mp, *map = NULL;
int dfd = -1, mdfd = -1;
- char *avail;
+ char *avail = NULL;
int active_disks;
int trustworthy;
char *name_to_use;
mdu_array_info_t ainf;
struct dev_policy *policy = NULL;
+ struct map_ent target_array;
+ int have_target;
struct createinfo *ci = conf_get_create_info();
if (must_be_container(dfd)) {
if (!st)
st = super_by_fd(dfd, NULL);
- if (st)
+ if (st && st->ss->load_container)
rv = st->ss->load_container(st, dfd, NULL);
close(dfd);
dinfo.disk.minor = minor(stb.st_rdev);
policy = disk_policy(&dinfo);
+ have_target = policy_check_path(&dinfo, &target_array);
if (st == NULL && (st = guess_super(dfd)) == NULL) {
if (verbose >= 0)
fprintf(stderr, Name
": no recognisable superblock on %s.\n",
devname);
- rv = try_spare(devname, &dfd, policy, st, verbose);
+ rv = try_spare(devname, &dfd, policy,
+ have_target ? &target_array : NULL,
+ st, verbose);
goto out;
}
if (st->ss->compare_super == NULL ||
if (verbose >= 0)
fprintf(stderr, Name ": no RAID superblock on %s.\n",
devname);
- rv = try_spare(devname, &dfd, policy, st, verbose);
+ rv = try_spare(devname, &dfd, policy,
+ have_target ? &target_array : NULL,
+ st, verbose);
free(st);
goto out;
}
mdfd = -1;
if (mdfd < 0) {
- struct mdinfo *sra;
/* Couldn't find an existing array, maybe make a new one */
mdfd = create_mddev(match ? match->devname : NULL,
rv = 2;
goto out;
}
- sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS);
+ sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+
if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
/* It really should be 'none' - must be old buggy
* kernel, and mdadm -I may not be able to complete.
goto out;
}
info.array.working_disks = 1;
- sysfs_free(sra);
/* 6/ Make sure /var/run/mdadm.map contains this array. */
map_update(&map, fd2devnum(mdfd),
info.text_version,
char dn[20];
int dfd2;
int err;
- struct mdinfo *sra;
struct supertype *st2;
struct mdinfo info2, *d;
+ sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+
if (mp->path)
strcpy(chosen_name, mp->path);
else
goto out;
}
}
- sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
if (!sra) {
rv = 2;
goto out;
chosen_name, info.array.working_disks);
wait_for(chosen_name, mdfd);
close(mdfd);
+ sysfs_free(sra);
rv = Incremental(chosen_name, verbose, runstop,
NULL, homehost, require_homehost, autof);
if (rv == 1)
rv = 0;
return rv;
}
- avail = NULL;
- active_disks = count_active(st, mdfd, &avail, &info);
+
+ /* We have added something to the array, so need to re-read the
+ * state. Eventually this state should be kept up-to-date as
+ * things change.
+ */
+ sysfs_free(sra);
+ sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+ active_disks = count_active(st, sra, mdfd, &avail, &info);
if (enough(info.array.level, info.array.raid_disks,
info.array.layout, info.array.state & 1,
avail, active_disks) == 0) {
- free(avail);
if (verbose >= 0)
fprintf(stderr, Name
": %s attached to %s, not enough to start (%d).\n",
rv = 0;
goto out;
}
- free(avail);
/* 7b/ if yes, */
/* - if number of OK devices match expected, or -R and there */
map_unlock(&map);
if (runstop > 0 || active_disks >= info.array.working_disks) {
- struct mdinfo *sra, *dsk;
+ struct mdinfo *dsk;
/* Let's try to start it */
if (match && match->bitmap_file) {
int bmfd = open(match->bitmap_file, O_RDWR);
}
close(bmfd);
}
- /* GET_* needed so add_disk works below */
- sra = sysfs_read(mdfd, fd2devnum(mdfd),
- GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
+ /* Need to remove from the array any devices which
+ * 'count_active' discerned were too old or inappropriate
+ */
+ for (d = sra ? sra->devs : NULL ; d ; d = d->next)
+ if (d->disk.state & (1<<MD_DISK_REMOVED))
+ remove_disk(mdfd, st, sra, d);
+
if ((sra == NULL || active_disks >= info.array.working_disks)
&& trustworthy != FOREIGN)
rv = ioctl(mdfd, RUN_ARRAY, NULL);
rv = 0;
}
out:
+ free(avail);
if (dfd >= 0)
close(dfd);
if (mdfd >= 0)
close(mdfd);
if (policy)
dev_policy_free(policy);
+ if (sra)
+ sysfs_free(sra);
return rv;
}
}
}
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+ int mdfd, char **availp,
struct mdinfo *bestinfo)
{
/* count how many devices in sra think they are active */
struct mdinfo *d;
- int cnt = 0, cnt1 = 0;
+ int cnt = 0;
__u64 max_events = 0;
- struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
char *avail = NULL;
+ int *best;
+ char *devmap = NULL;
+ int numdevs = 0;
+ int devnum;
+ int b, i;
+ int raid_disks = 0;
if (!sra)
return 0;
- for (d = sra->devs ; d ; d = d->next) {
+ for (d = sra->devs ; d ; d = d->next)
+ numdevs++;
+ for (d = sra->devs, devnum=0 ; d ; d = d->next, devnum++) {
char dn[30];
int dfd;
int ok;
close(dfd);
if (ok != 0)
continue;
- st->ss->getinfo_super(st, &info, NULL);
+ info.array.raid_disks = raid_disks;
+ st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
if (!avail) {
- avail = malloc(info.array.raid_disks);
+ raid_disks = info.array.raid_disks;
+ avail = calloc(raid_disks, 1);
if (!avail) {
fprintf(stderr, Name ": out of memory.\n");
exit(1);
}
- memset(avail, 0, info.array.raid_disks);
*availp = avail;
+
+ best = calloc(raid_disks, sizeof(int));
+ devmap = calloc(raid_disks * numdevs, 1);
+
+ st->ss->getinfo_super(st, &info, devmap);
}
if (info.disk.state & (1<<MD_DISK_SYNC))
cnt++;
max_events = info.events;
avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
st->ss->getinfo_super(st, bestinfo, NULL);
} else if (info.events == max_events) {
- cnt++;
avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
} else if (info.events == max_events-1) {
- cnt1++;
- avail[info.disk.raid_disk] = 1;
+ if (avail[info.disk.raid_disk] == 0) {
+ avail[info.disk.raid_disk] = 1;
+ best[info.disk.raid_disk] = devnum;
+ }
} else if (info.events < max_events - 1)
;
else if (info.events == max_events+1) {
int i;
- cnt1 = cnt;
- cnt = 1;
max_events = info.events;
- for (i=0; i<info.array.raid_disks; i++)
+ for (i=0; i < raid_disks; i++)
if (avail[i])
avail[i]--;
avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
st->ss->getinfo_super(st, bestinfo, NULL);
} else { /* info.events much bigger */
- cnt = 1; cnt1 = 0;
memset(avail, 0, info.disk.raid_disk);
max_events = info.events;
avail[info.disk.raid_disk] = 2;
}
st->ss->free_super(st);
}
- return cnt + cnt1;
+ if (!avail)
+ return 0;
+ /* We need to reject any device that thinks the best device is
+ * failed or missing */
+ for (b = 0; b < raid_disks; b++)
+ if (avail[b] == 2)
+ break;
+ cnt = 0;
+ for (i = 0 ; i < raid_disks ; i++) {
+ if (i != b && avail[i])
+ if (devmap[raid_disks * best[i] + b] == 0) {
+ /* This device thinks 'b' is failed -
+ * don't use it */
+ devnum = best[i];
+ for (d=sra->devs ; devnum; d = d->next)
+ devnum--;
+ d->disk.state |= (1 << MD_DISK_REMOVED);
+ avail[i] = 0;
+ }
+ if (avail[i])
+ cnt++;
+ }
+ free(best);
+ free(devmap);
+ return cnt;
}
static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target, int bare,
struct supertype *st, int verbose)
{
/* This device doesn't have any md metadata
- * If it is 'bare' and theh device policy allows 'spare' look for
- * an array or container to attach it to.
+ * The device policy allows 'spare' and if !bare, it allows spare-same-slot.
+ * If 'st' is not set, then we only know that some metadata allows this,
+ * others possibly don't.
+ * So look for a container or array to attach the device to.
+ * Prefer 'target' if that is set and the array is found.
+ *
* If st is set, then only arrays of that type are considered
* Return 0 on success, or some exit code on failure, probably 1.
*/
for(i=0; !st2 && superlist[i]; i++)
st2 = superlist[i]->match_metadata_desc(
sra->text_version);
+ if (!st2) {
+ if (verbose > 1)
+ fprintf(stderr, Name ": not adding %s to %s"
+ " as metadata not recognised.\n",
+ devname, mp->path);
+ goto next;
+ }
+ /* Need to double check the 'act_spare' permissions applies
+ * to this metadata.
+ */
+ if (!policy_action_allows(pol, st2->ss->name, act_spare))
+ goto next;
+ if (!bare && !policy_action_allows(pol, st2->ss->name,
+ act_spare_same_slot))
+ goto next;
} else
st2 = st;
get_dev_size(dfd, NULL, &devsize);
goto next;
}
+ /* test against target.
+ * If 'target' is set and 'bare' is false, we only accept
+ * arrays/containers that match 'target'.
+ * If 'target' is set and 'bare' is true, we prefer the
+ * array which matches 'target'.
+ */
+ if (target) {
+ if (strcmp(target->metadata, mp->metadata) == 0 &&
+ memcmp(target->uuid, mp->uuid,
+ sizeof(target->uuid)) == 0) {
+ /* This is our target!! */
+ if (chosen)
+ sysfs_free(chosen);
+ chosen = sra;
+ sra = NULL;
+ /* skip to end so we don't check any more */
+ while (mp->next)
+ mp = mp->next;
+ goto next;
+ }
+ /* not our target */
+ if (!bare)
+ goto next;
+ }
+
/* all tests passed, OK to add to this array */
if (!chosen) {
chosen = sra;
close(dfd);
*dfdp = -1;
rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist,
- -1, 0);
+ -1, 0, NULL);
close(mdfd);
}
if (verbose > 0) {
if (domain_test(domlist, pol, st2->ss->name) == 0)
/* Incompatible devices for this metadata type */
goto next;
+ if (!policy_action_allows(pol, st2->ss->name, act_spare))
+ /* Some partition types allow sparing, but not
+ * this one.
+ */
+ goto next;
}
st2->ss->getinfo_super(st2, &info, NULL);
return 0;
}
+static int is_bare(int dfd)
+{
+ unsigned long long size = 0;
+ char bufpad[4096 + 4096];
+ char *buf = (char*)(((long)bufpad + 4096) & ~4095);
+
+ if (lseek(dfd, 0, SEEK_SET) != 0 ||
+ read(dfd, buf, 4096) != 4096)
+ return 0;
+
+ if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+ return 0;
+ if (memcmp(buf, buf+1, 4095) != 0)
+ return 0;
+
+ /* OK, first 4K appear blank, try the end. */
+ get_dev_size(dfd, NULL, &size);
+ if (lseek(dfd, size-4096, SEEK_SET) < 0 ||
+ read(dfd, buf, 4096) != 4096)
+ return 0;
+
+ if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+ return 0;
+ if (memcmp(buf, buf+1, 4095) != 0)
+ return 0;
+
+ return 1;
+}
/* adding a spare to a regular array is quite different from adding one to
* a set-of-partitions virtual array.
* Arrays are given priority over partitions.
*/
static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target,
struct supertype *st, int verbose)
{
int i;
int rv;
int arrays_ok = 0;
int partitions_ok = 0;
- char bufpad[4096 + 4096];
- char *buf = (char*)(((long)bufpad + 4096) & ~4095);
int dfd = *dfdp;
+ int bare;
- /* Can only add a spare if device has at least one domains */
+ /* Can only add a spare if device has at least one domain */
if (pol_find(pol, pol_domain) == NULL)
return 1;
/* And only if some action allows spares */
if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
return 1;
- /* Now check if the device is bare - we don't add non-bare devices
- * yet even if action=-spare
+ /* Now check if the device is bare.
+ * bare devices can always be added as a spare
+ * non-bare devices can only be added if spare-same-slot is permitted,
+ * and this device is replacing a previous device - in which case 'target'
+ * will be set.
*/
+ if (!is_bare(dfd)) {
+ /* Must have a target and allow same_slot */
+ /* Later - may allow force_spare without target */
+ if (!target ||
+ !policy_action_allows(pol, st?st->ss->name:NULL,
+ act_spare_same_slot)) {
+ if (verbose > 1)
+ fprintf(stderr, Name ": %s is not bare, so not "
+ "considering as a spare\n",
+ devname);
+ return 1;
+ }
+ bare = 0;
+ } else
+ bare = 1;
- if (lseek(dfd, 0, SEEK_SET) != 0 ||
- read(dfd, buf, 4096) != 4096) {
- not_bare:
- if (verbose > 1)
- fprintf(stderr, Name ": %s is not bare, so not considering as a spare\n",
- devname);
- return 1;
- }
- if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
- goto not_bare;
- if (memcmp(buf, buf+1, 4095) != 0)
- goto not_bare;
-
- /* OK, first 4K appear blank, try the end. */
- if (lseek(dfd, -4096, SEEK_END) < 0 ||
- read(dfd, buf, 4096) != 4096)
- goto not_bare;
-
- if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
- goto not_bare;
- if (memcmp(buf, buf+1, 4095) != 0)
- goto not_bare;
-
- /* This device passes our test for 'is bare'.
- * Let's see what policy allows for such things.
+ /* It might be OK to add this device to an array - need to see
+ * what arrays might be candidates.
*/
if (st) {
/* just try try 'array' or 'partition' based on this metadata */
if (st->ss->add_to_super)
- return array_try_spare(devname, dfdp, pol,
+ return array_try_spare(devname, dfdp, pol, target, bare,
st, verbose);
else
return partition_try_spare(devname, dfdp, pol,
st, verbose);
}
- /* Now see which metadata type support spare */
+ /* No metadata was specified or found so options are open.
+ * Check for whether any array metadata, or any partition metadata
+ * might allow adding the spare. This check is just help to avoid
+ * a more costly scan of all arrays when we can be sure that will
+ * fail.
+ */
for (i = 0; (!arrays_ok || !partitions_ok) && superlist[i] ; i++) {
if (superlist[i]->add_to_super && !arrays_ok &&
policy_action_allows(pol, superlist[i]->name, act_spare))
policy_action_allows(pol, superlist[i]->name, act_spare))
partitions_ok = 1;
}
- rv = 0;
+ rv = 1;
if (arrays_ok)
- rv = array_try_spare(devname, dfdp, pol, st, verbose);
- if (rv == 0 && partitions_ok)
+ rv = array_try_spare(devname, dfdp, pol, target, bare,
+ st, verbose);
+ if (rv != 0 && partitions_ok)
rv = partition_try_spare(devname, dfdp, pol, st, verbose);
return rv;
}
int subfd = open_dev(memb->devnum);
if (subfd >= 0) {
Manage_subdevs(memb->dev, subfd,
- &devlist, verbose, 0);
+ &devlist, verbose, 0,
+ NULL);
close(subfd);
}
}
free_mdstat(mdstat);
} else
- Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+ Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
devlist.disposition = 'r';
- rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+ rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
close(mdfd);
free_mdstat(ent);
return rv;