/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
- * Email: <neilb@cse.unsw.edu.au>
- * Paper: Neil Brown
- * School of Computer Science and Engineering
- * The University of New South Wales
- * Sydney, 2052
- * Australia
+ * Email: <neilb@suse.de>
*/
#include "mdadm.h"
#include "md_p.h"
#include <ctype.h>
+static int default_layout(struct supertype *st, int level, int verbose)
+{
+ int layout = UnSet;
+
+ if (st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &level, &layout, NULL);
+
+ if (layout == UnSet)
+ switch(level) {
+ default: /* no layout */
+ layout = 0;
+ break;
+ case 10:
+ layout = 0x102; /* near=2, far=1 */
+ if (verbose > 0)
+ fprintf(stderr,
+ Name ": layout defaults to n2\n");
+ break;
+ case 5:
+ case 6:
+ layout = map_name(r5layout, "default");
+ if (verbose > 0)
+ fprintf(stderr,
+ Name ": layout defaults to %s\n", map_num(r5layout, layout));
+ break;
+ case LEVEL_FAULTY:
+ layout = map_name(faultylayout, "default");
+
+ if (verbose > 0)
+ fprintf(stderr,
+ Name ": layout defaults to %s\n", map_num(faultylayout, layout));
+ break;
+ }
+
+ return layout;
+}
+
+
int Create(struct supertype *st, char *mddev,
- int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks,
+ int chunk, int level, int layout, unsigned long long size,
+ int raiddisks, int sparedisks,
char *name, char *homehost, int *uuid,
- int subdevs, mddev_dev_t devlist,
+ int subdevs, struct mddev_dev *devlist,
int runstop, int verbose, int force, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof)
+ char *bitmap_file, int bitmap_chunk, int write_behind,
+ int delay, int autof)
{
/*
* Create a new raid array.
char *mindisc = NULL;
char *maxdisc = NULL;
int dnum;
- mddev_dev_t dv;
+ struct mddev_dev *dv;
int fail=0, warn=0;
struct stat stb;
int first_missing = subdevs * 2;
unsigned long long bitmapsize;
struct mdinfo info, *infos;
int did_default = 0;
+ int do_default_layout = 0;
+ int do_default_chunk = 0;
unsigned long safe_mode_delay = 0;
char chosen_name[1024];
struct map_ent *map = NULL;
+ unsigned long long newsize;
int major_num = BITMAP_MAJOR_HI;
memset(&info, 0, sizeof(info));
-
- if (level == UnSet) {
- /* "ddf" and "imsm" metadata only supports one level - should possibly
- * push this into metadata handler??
- */
- if (st && (st->ss == &super_ddf || st->ss == &super_imsm))
- level = LEVEL_CONTAINER;
- }
-
+ if (level == UnSet && st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &level, NULL, NULL);
if (level == UnSet) {
fprintf(stderr,
Name ": a RAID level is needed to create an array.\n");
inf.raid_disks == 0) {
/* yep, looks like a container */
if (st) {
- rv = st->ss->load_super(st, fd,
- devlist->devname);
+ rv = st->ss->load_container(st, fd,
+ devlist->devname);
if (rv == 0)
have_container = 1;
} else {
- st = guess_super(fd);
+ st = super_by_fd(fd, NULL);
if (st && !(rv = st->ss->
- load_super(st, fd,
- devlist->devname)))
+ load_container(st, fd,
+ devlist->devname)))
have_container = 1;
else
st = NULL;
}
+ if (have_container) {
+ subdevs = raiddisks;
+ first_missing = subdevs * 2;
+ second_missing = subdevs * 2;
+ insert_point = subdevs * 2;
+ }
}
if (fd >= 0)
close(fd);
- if (have_container) {
- subdevs = 0;
- devlist = NULL;
- }
}
if (st && st->ss->external && sparedisks) {
fprintf(stderr,
Name ": This metadata type does not support "
- "spare disks are create time\n");
+ "spare disks at create time\n");
return 1;
}
if (subdevs > raiddisks+sparedisks) {
}
/* now set some defaults */
- if (layout == UnSet)
- switch(level) {
- default: /* no layout */
- layout = 0;
- break;
- case 10:
- layout = 0x102; /* near=2, far=1 */
- if (verbose > 0)
- fprintf(stderr,
- Name ": layout defaults to n1\n");
- break;
- case 5:
- case 6:
- layout = map_name(r5layout, "default");
- if (verbose > 0)
- fprintf(stderr,
- Name ": layout defaults to %s\n", map_num(r5layout, layout));
- break;
- case LEVEL_FAULTY:
- layout = map_name(faultylayout, "default");
- if (verbose > 0)
- fprintf(stderr,
- Name ": layout defaults to %s\n", map_num(faultylayout, layout));
- break;
- }
+
+ if (layout == UnSet) {
+ do_default_layout = 1;
+ layout = default_layout(st, level, verbose);
+ }
if (level == 10)
/* check layout fits in array*/
case 10:
case 6:
case 0:
- case LEVEL_LINEAR: /* linear */
- if (chunk == 0) {
+ if (chunk == 0 || chunk == UnSet) {
+ chunk = UnSet;
+ do_default_chunk = 1;
+ /* chunk will be set later */
+ }
+ break;
+ case LEVEL_LINEAR:
+ /* a chunksize of zero 0s perfectly valid (and preferred) since 2.6.16 */
+ if (get_linux_version() < 2006016 && chunk == 0) {
chunk = 64;
if (verbose > 0)
fprintf(stderr, Name ": chunk size defaults to 64K\n");
fprintf(stderr, Name ": unknown level %d\n", level);
return 1;
}
-
+
+ if (size && chunk && chunk != UnSet)
+ size &= ~(unsigned long long)(chunk - 1);
+ newsize = size * 2;
if (st && ! st->ss->validate_geometry(st, level, layout, raiddisks,
- chunk, size, NULL, NULL, verbose>=0))
+ &chunk, size*2, NULL, &newsize, verbose>=0))
return 1;
+ if (chunk) {
+ newsize &= ~(unsigned long long)(chunk*2 - 1);
+ size &= ~(unsigned long long)(chunk - 1);
+ }
+ if (size == 0) {
+ size = newsize / 2;
+ if (size && verbose > 0)
+ fprintf(stderr, Name ": setting size to %lluK\n",
+ (unsigned long long)size);
+ }
+
/* now look at the subdevs */
info.array.active_disks = 0;
info.array.working_disks = 0;
dnum = 0;
- for (dv=devlist; dv; dv=dv->next, dnum++) {
+ for (dv=devlist; dv && !have_container; dv=dv->next, dnum++) {
char *dname = dv->devname;
unsigned long long freesize;
if (strcasecmp(dname, "missing")==0) {
char *name = "default";
for(i=0; !st && superlist[i]; i++) {
st = superlist[i]->match_metadata_desc(name);
+ if (do_default_layout)
+ layout = default_layout(st, level, verbose);
if (st && !st->ss->validate_geometry
(st, level, layout, raiddisks,
- chunk, size, dname, &freesize,
- verbose > 0))
+ &chunk, size*2, dname, &freesize,
+ verbose > 0)) {
+ free(st);
st = NULL;
+ chunk = do_default_chunk ? 0 : chunk;
+ }
}
if (!st) {
st->minor_version != 90)
did_default = 1;
} else {
+ if (do_default_layout)
+ layout = default_layout(st, level, verbose);
if (!st->ss->validate_geometry(st, level, layout,
raiddisks,
- chunk, size, dname,
+ &chunk, size*2, dname,
&freesize,
- verbose > 0)) {
+ verbose >= 0)) {
fprintf(stderr,
Name ": %s is not suitable for "
continue;
}
}
+ if (verbose > 0 && do_default_chunk) {
+ do_default_chunk = 0;
+ fprintf(stderr, Name ": chunk size "
+ "defaults to %dK\n", chunk);
+ }
freesize /= 2; /* convert to K */
if (chunk) {
warn |= check_ext2(fd, dname);
warn |= check_reiser(fd, dname);
warn |= check_raid(fd, dname);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1)
+ /* metadata at front */
+ warn |= check_partitions(fd, dname, 0, 0);
+ else if (level == 1 || level == LEVEL_CONTAINER
+ || (level == 0 && raiddisks == 1))
+ /* partitions could be meaningful */
+ warn |= check_partitions(fd, dname, freesize*2, size*2);
+ else
+ /* partitions cannot be meaningful */
+ warn |= check_partitions(fd, dname, 0, 0);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1 &&
+ did_default &&
+ level == 1 &&
+ (warn & 1024) == 0) {
+ warn |= 1024;
+ fprintf(stderr, Name ": Note: this array has metadata at the start and\n"
+ " may not be suitable as a boot device. If you plan to\n"
+ " store '/boot' on this device please ensure that\n"
+ " your boot-loader understands md/v1.x metadata, or use\n"
+ " --metadata=0.90\n");
+ }
close(fd);
}
}
+ if (raiddisks + sparedisks > st->max_devs) {
+ fprintf(stderr, Name ": Too many devices:"
+ " %s metadata only supports %d\n",
+ st->ss->name, st->max_devs);
+ return 1;
+ }
+ if (have_container)
+ info.array.working_disks = raiddisks;
if (fail) {
fprintf(stderr, Name ": create aborted\n");
return 1;
/* size is meaningful */
if (!st->ss->validate_geometry(st, level, layout,
raiddisks,
- chunk, minsize,
+ &chunk, minsize*2,
NULL, NULL, 0)) {
fprintf(stderr, Name ": devices too large for RAID level %d\n", level);
return 1;
fprintf(stderr, Name ": size set to %lluK\n", size);
}
}
- if (level > 0 && ((maxsize-size)*100 > maxsize)) {
+ if (!have_container && level > 0 && ((maxsize-size)*100 > maxsize)) {
if (runstop != 1 || verbose >= 0)
- fprintf(stderr, Name ": largest drive (%s) exceed size (%lluK) by more than 1%%\n",
+ fprintf(stderr, Name ": largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
maxdisc, size);
warn = 1;
}
+ if (st->ss->detail_platform && st->ss->detail_platform(0, 1) != 0) {
+ if (runstop != 1 || verbose >= 0)
+ fprintf(stderr, Name ": %s unable to enumerate platform support\n"
+ " array may not be compatible with hardware/firmware\n",
+ st->ss->name);
+ warn = 1;
+ }
+
if (warn) {
if (runstop!= 1) {
if (!ask("Continue creating array? ")) {
* as missing, so that a reconstruct happens (faster than re-parity)
* FIX: Can we do this for raid6 as well?
*/
- if (assume_clean==0 && force == 0 && first_missing >= raiddisks) {
+ if (st->ss->external == 0 &&
+ assume_clean==0 && force == 0 && first_missing >= raiddisks) {
switch ( level ) {
case 4:
case 5:
mdfd = create_mddev(mddev, name, autof, LOCAL, chosen_name);
if (mdfd < 0)
return 1;
+ mddev = chosen_name;
vers = md_get_version(mdfd);
if (vers < 9000) {
assume_clean
) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
- info.resync_start = ~0ULL;
+ info.resync_start = MaxSector;
} else {
info.array.state = 0; /* not clean, but no errors */
info.resync_start = 0;
total_slots = info.array.nr_disks;
sysfs_init(&info, mdfd, 0);
- st->ss->getinfo_super(st, &info);
+ st->ss->getinfo_super(st, &info, NULL);
if (did_default && verbose >= 0) {
if (is_subarray(info.text_version)) {
fprintf(stderr, Name ": internal bitmaps not supported by this kernel.\n");
goto abort;
}
+ if (!st->ss->add_internal_bitmap) {
+ fprintf(stderr, Name ": internal bitmaps not supported with %s metadata\n",
+ st->ss->name);
+ goto abort;
+ }
if (!st->ss->add_internal_bitmap(st, &bitmap_chunk,
delay, write_behind,
bitmapsize, 1, major_num)) {
sysfs_init(&info, mdfd, 0);
- if (st->ss->external && st->subarray[0]) {
+ if (st->ss->external && st->container_dev != NoMdDev) {
/* member */
/* When creating a member, we need to be careful
infos = malloc(sizeof(*infos) * total_slots);
for (pass=1; pass <=2 ; pass++) {
- mddev_dev_t moved_disk = NULL; /* the disk that was moved out of the insert point */
+ struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
for (dnum=0, dv = devlist ; dv ;
dv=(dv->next)?(dv->next):moved_disk, dnum++) {
abort();
if (dnum == insert_point) {
moved_disk = dv;
+ continue;
}
- if (dnum == insert_point ||
- strcasecmp(dv->devname, "missing")==0)
+ if (strcasecmp(dv->devname, "missing")==0)
continue;
+ if (have_container)
+ moved_disk = NULL;
+ if (have_container && dnum < info.array.raid_disks - 1)
+ /* repeatedly use the container */
+ moved_disk = dv;
switch(pass) {
case 1:
if (dv->writemostly == 1)
inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
- if (st->ss->external && st->subarray[0])
- fd = open(dv->devname, O_RDWR);
- else
- fd = open(dv->devname, O_RDWR|O_EXCL);
-
- if (fd < 0) {
- fprintf(stderr, Name ": failed to open %s "
- "after earlier success - aborting\n",
- dv->devname);
+ if (have_container)
+ fd = -1;
+ else {
+ if (st->ss->external &&
+ st->container_dev != NoMdDev)
+ fd = open(dv->devname, O_RDWR);
+ else
+ fd = open(dv->devname, O_RDWR|O_EXCL);
+
+ if (fd < 0) {
+ fprintf(stderr, Name ": failed to open %s "
+ "after earlier success - aborting\n",
+ dv->devname);
+ goto abort;
+ }
+ fstat(fd, &stb);
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
+ }
+ if (fd >= 0)
+ remove_partitions(fd);
+ if (st->ss->add_to_super(st, &inf->disk,
+ fd, dv->devname)) {
+ ioctl(mdfd, STOP_ARRAY, NULL);
goto abort;
}
- fstat(fd, &stb);
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
-
- remove_partitions(fd);
- st->ss->add_to_super(st, &inf->disk,
- fd, dv->devname);
- st->ss->getinfo_super(st, inf);
+ st->ss->getinfo_super(st, inf, NULL);
safe_mode_delay = inf->safe_mode_delay;
- /* getinfo_super might have lost these ... */
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
+ if (have_container && verbose > 0)
+ fprintf(stderr, Name ": Using %s for device %d\n",
+ map_dev(inf->disk.major,
+ inf->disk.minor,
+ 0), dnum);
+
+ if (!have_container) {
+ /* getinfo_super might have lost these ... */
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
+ }
break;
case 2:
inf->errors = 0;
Name ": ADD_NEW_DISK for %s "
"failed: %s\n",
dv->devname, strerror(errno));
- st->ss->free_super(st);
goto abort;
}
break;
}
- if (dv == moved_disk && dnum != insert_point) break;
+ if (!have_container &&
+ dv == moved_disk && dnum != insert_point) break;
}
if (pass == 1) {
+ struct mdinfo info_new;
+ struct map_ent *me = NULL;
+
+ /* check to see if the uuid has changed due to these
+ * metadata changes, and if so update the member array
+ * and container uuid. Note ->write_init_super clears
+ * the subarray cursor such that ->getinfo_super once
+ * again returns container info.
+ */
+ map_lock(&map);
+ st->ss->getinfo_super(st, &info_new, NULL);
+ if (st->ss->external && level != LEVEL_CONTAINER &&
+ !same_uuid(info_new.uuid, info.uuid, 0)) {
+ map_update(&map, fd2devnum(mdfd),
+ info_new.text_version,
+ info_new.uuid, chosen_name);
+ me = map_by_devnum(&map, st->container_dev);
+ }
+
st->ss->write_init_super(st);
+
+ /* update parent container uuid */
+ if (me) {
+ char *path = strdup(me->path);
+
+ st->ss->getinfo_super(st, &info_new, NULL);
+ map_update(&map, st->container_dev,
+ info_new.text_version,
+ info_new.uuid, path);
+ free(path);
+ }
+ map_unlock(&map);
+
flush_metadata_updates(st);
+ st->ss->free_super(st);
}
}
free(infos);
- st->ss->free_super(st);
if (level == LEVEL_CONTAINER) {
/* No need to start. But we should signal udev to
* create links */
sysfs_uevent(&info, "change");
- ;
+ if (verbose >= 0)
+ fprintf(stderr, Name ": container %s prepared.\n", mddev);
+ wait_for(chosen_name, mdfd);
} else if (runstop == 1 || subdevs >= raiddisks) {
if (st->ss->external) {
+ int err;
switch(level) {
case LEVEL_LINEAR:
case LEVEL_MULTIPATH:
case 0:
- sysfs_set_str(&info, NULL, "array_state",
- "active");
+ err = sysfs_set_str(&info, NULL, "array_state",
+ "active");
need_mdmon = 0;
break;
default:
- sysfs_set_str(&info, NULL, "array_state",
- "readonly");
+ err = sysfs_set_str(&info, NULL, "array_state",
+ "readonly");
break;
}
sysfs_set_safemode(&info, safe_mode_delay);
+ if (err) {
+ fprintf(stderr, Name ": failed to"
+ " activate array.\n");
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
+ }
} else {
/* param is not actually used */
mdu_param_t param;
if (ioctl(mdfd, RUN_ARRAY, ¶m)) {
fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
strerror(errno));
- Manage_runstop(mddev, mdfd, -1, 0);
+ ioctl(mdfd, STOP_ARRAY, NULL);
goto abort;
}
}
if (verbose >= 0)
fprintf(stderr, Name ": array %s started.\n", mddev);
- if (st->ss->external && st->subarray[0]) {
+ if (st->ss->external && st->container_dev != NoMdDev) {
if (need_mdmon)
start_mdmon(st->container_dev);
- ping_monitor(devnum2devname(st->container_dev));
+ ping_monitor_by_id(st->container_dev);
close(container_fd);
}
+ wait_for(chosen_name, mdfd);
} else {
fprintf(stderr, Name ": not starting array - not enough devices.\n");
}
return 0;
abort:
+ map_lock(&map);
+ map_remove(&map, fd2devnum(mdfd));
+ map_unlock(&map);
+
if (mdfd >= 0)
close(mdfd);
return 1;