/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
- * Email: <neilb@cse.unsw.edu.au>
- * Paper: Neil Brown
- * School of Computer Science and Engineering
- * The University of New South Wales
- * Sydney, 2052
- * Australia
+ * Email: <neilb@suse.de>
*/
#include "mdadm.h"
#include "md_p.h"
#include <ctype.h>
-int Create(struct supertype *st, char *mddev, int mdfd,
+static int default_layout(struct supertype *st, int level, int verbose)
+{
+ int layout = UnSet;
+
+ if (st && st->ss->default_layout)
+ layout = st->ss->default_layout(level);
+
+ if (layout == UnSet)
+ switch(level) {
+ default: /* no layout */
+ layout = 0;
+ break;
+ case 10:
+ layout = 0x102; /* near=2, far=1 */
+ if (verbose > 0)
+ fprintf(stderr,
+ Name ": layout defaults to n1\n");
+ break;
+ case 5:
+ case 6:
+ layout = map_name(r5layout, "default");
+ if (verbose > 0)
+ fprintf(stderr,
+ Name ": layout defaults to %s\n", map_num(r5layout, layout));
+ break;
+ case LEVEL_FAULTY:
+ layout = map_name(faultylayout, "default");
+
+ if (verbose > 0)
+ fprintf(stderr,
+ Name ": layout defaults to %s\n", map_num(faultylayout, layout));
+ break;
+ }
+
+ return layout;
+}
+
+
+int Create(struct supertype *st, char *mddev,
int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks,
char *name, char *homehost, int *uuid,
int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay)
+ char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof)
{
/*
* Create a new raid array.
* if runstop==run, or raiddisks disks were used,
* RUN_ARRAY
*/
+ int mdfd;
unsigned long long minsize=0, maxsize=0;
char *mindisc = NULL;
char *maxdisc = NULL;
int rv;
int bitmap_fd;
int have_container = 0;
- int container_fd;
+ int container_fd = -1;
int need_mdmon = 0;
unsigned long long bitmapsize;
- struct mdinfo *sra;
struct mdinfo info, *infos;
int did_default = 0;
+ int do_default_layout = 0;
+ unsigned long safe_mode_delay = 0;
+ char chosen_name[1024];
+ struct map_ent *map = NULL;
+ unsigned long long newsize;
int major_num = BITMAP_MAJOR_HI;
memset(&info, 0, sizeof(info));
- vers = md_get_version(mdfd);
- if (vers < 9000) {
- fprintf(stderr, Name ": Create requires md driver version 0.90.0 or later\n");
- return 1;
- } else {
- mdu_array_info_t inf;
- memset(&inf, 0, sizeof(inf));
- ioctl(mdfd, GET_ARRAY_INFO, &inf);
- if (inf.working_disks != 0) {
- fprintf(stderr, Name ": another array by this name"
- " is already running.\n");
- return 1;
- }
- }
if (level == UnSet) {
/* "ddf" and "imsm" metadata only supports one level - should possibly
* push this into metadata handler??
int fd;
memset(&inf, 0, sizeof(inf));
- fd = open(devlist->devname, O_RDONLY, 0);
+ fd = open(devlist->devname, O_RDONLY);
if (fd >= 0 &&
ioctl(fd, GET_ARRAY_INFO, &inf) == 0 &&
inf.raid_disks == 0) {
else
st = NULL;
}
+ if (have_container) {
+ subdevs = raiddisks;
+ first_missing = subdevs * 2;
+ second_missing = subdevs * 2;
+ insert_point = subdevs * 2;
+ }
}
if (fd >= 0)
close(fd);
- if (have_container) {
- subdevs = 0;
- devlist = NULL;
- }
+ }
+ if (st && st->ss->external && sparedisks) {
+ fprintf(stderr,
+ Name ": This metadata type does not support "
+ "spare disks at create time\n");
+ return 1;
}
if (subdevs > raiddisks+sparedisks) {
fprintf(stderr, Name ": You have listed more devices (%d) than are in the array(%d)!\n", subdevs, raiddisks+sparedisks);
}
/* now set some defaults */
- if (layout == UnSet)
- switch(level) {
- default: /* no layout */
- layout = 0;
- break;
- case 10:
- layout = 0x102; /* near=2, far=1 */
- if (verbose > 0)
- fprintf(stderr,
- Name ": layout defaults to n1\n");
- break;
- case 5:
- case 6:
- layout = map_name(r5layout, "default");
- if (verbose > 0)
- fprintf(stderr,
- Name ": layout defaults to %s\n", map_num(r5layout, layout));
- break;
- case LEVEL_FAULTY:
- layout = map_name(faultylayout, "default");
- if (verbose > 0)
- fprintf(stderr,
- Name ": layout defaults to %s\n", map_num(faultylayout, layout));
- break;
- }
+
+ if (layout == UnSet) {
+ do_default_layout = 1;
+ layout = default_layout(st, level, verbose);
+ }
if (level == 10)
/* check layout fits in array*/
case 10:
case 6:
case 0:
- case LEVEL_LINEAR: /* linear */
if (chunk == 0) {
+ chunk = 512;
+ if (verbose > 0)
+ fprintf(stderr, Name ": chunk size defaults to 512K\n");
+ }
+ break;
+ case LEVEL_LINEAR:
+ /* a chunksize of zero 0s perfectly valid (and preferred) since 2.6.16 */
+ if (get_linux_version() < 2006016 && chunk == 0) {
chunk = 64;
if (verbose > 0)
fprintf(stderr, Name ": chunk size defaults to 64K\n");
fprintf(stderr, Name ": unknown level %d\n", level);
return 1;
}
-
+
+ if (size && chunk)
+ size &= ~(unsigned long long)(chunk - 1);
+ newsize = size * 2;
if (st && ! st->ss->validate_geometry(st, level, layout, raiddisks,
- chunk, size, NULL, NULL, verbose>=0))
+ chunk, size*2, NULL, &newsize, verbose>=0))
return 1;
+ if (size == 0) {
+ size = newsize / 2;
+ if (size && verbose > 0)
+ fprintf(stderr, Name ": setting size to %lluK\n",
+ (unsigned long long)size);
+ }
/* now look at the subdevs */
info.array.active_disks = 0;
info.array.working_disks = 0;
dnum = 0;
- for (dv=devlist; dv; dv=dv->next, dnum++) {
+ for (dv=devlist; dv && !have_container; dv=dv->next, dnum++) {
char *dname = dv->devname;
unsigned long long freesize;
if (strcasecmp(dname, "missing")==0) {
char *name = "default";
for(i=0; !st && superlist[i]; i++) {
st = superlist[i]->match_metadata_desc(name);
+ if (do_default_layout)
+ layout = default_layout(st, level, verbose);
if (st && !st->ss->validate_geometry
(st, level, layout, raiddisks,
- chunk, size, dname, &freesize,
+ chunk, size*2, dname, &freesize,
verbose > 0))
st = NULL;
}
st->minor_version != 90)
did_default = 1;
} else {
+ if (do_default_layout)
+ layout = default_layout(st, level, verbose);
if (!st->ss->validate_geometry(st, level, layout,
raiddisks,
- chunk, size, dname,
+ chunk, size*2, dname,
&freesize,
- verbose > 0)) {
+ verbose >= 0)) {
fprintf(stderr,
Name ": %s is not suitable for "
}
if (size && freesize < size) {
- fprintf(stderr, Name ": %s is smaller that given size."
+ fprintf(stderr, Name ": %s is smaller than given size."
" %lluK < %lluK + metadata\n",
dname, freesize, size);
fail = 1;
minsize = freesize;
}
if (runstop != 1 || verbose >= 0) {
- int fd = open(dname, O_RDONLY, 0);
+ int fd = open(dname, O_RDONLY);
if (fd <0 ) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
dname, strerror(errno));
warn |= check_ext2(fd, dname);
warn |= check_reiser(fd, dname);
warn |= check_raid(fd, dname);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1)
+ /* metadata at front */
+ warn |= check_partitions(fd, dname, 0);
+ else if (level == 1 || level == LEVEL_CONTAINER)
+ /* partitions could be meaningful */
+ warn |= check_partitions(fd, dname, freesize*2);
+ else
+ /* partitions cannot be meaningful */
+ warn |= check_partitions(fd, dname, 0);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1 &&
+ did_default &&
+ level == 1 &&
+ (warn & 1024) == 0) {
+ warn |= 1024;
+ fprintf(stderr, Name ": Note: this array has metadata at the start and\n"
+ " may not be suitable as a boot device. If you plan to\n"
+ " store '/boot' on this device please ensure that\n"
+ " your boot-loader understands md/v1.x metadata, or use\n"
+ " --metadata=0.90\n");
+ }
close(fd);
}
}
+ if (have_container)
+ info.array.working_disks = raiddisks;
if (fail) {
fprintf(stderr, Name ": create aborted\n");
return 1;
/* size is meaningful */
if (!st->ss->validate_geometry(st, level, layout,
raiddisks,
- chunk, minsize,
+ chunk, minsize*2,
NULL, NULL, 0)) {
fprintf(stderr, Name ": devices too large for RAID level %d\n", level);
return 1;
fprintf(stderr, Name ": size set to %lluK\n", size);
}
}
- if (level > 0 && ((maxsize-size)*100 > maxsize)) {
+ if (!have_container && level > 0 && ((maxsize-size)*100 > maxsize)) {
if (runstop != 1 || verbose >= 0)
- fprintf(stderr, Name ": largest drive (%s) exceed size (%lluK) by more than 1%%\n",
+ fprintf(stderr, Name ": largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
maxdisc, size);
warn = 1;
}
+ if (st->ss->detail_platform && st->ss->detail_platform(0, 1) != 0) {
+ if (runstop != 1 || verbose >= 0)
+ fprintf(stderr, Name ": %s unable to enumerate platform support\n"
+ " array may not be compatible with hardware/firmware\n",
+ st->ss->name);
+ warn = 1;
+ }
+
if (warn) {
if (runstop!= 1) {
if (!ask("Continue creating array? ")) {
* as missing, so that a reconstruct happens (faster than re-parity)
* FIX: Can we do this for raid6 as well?
*/
- if (assume_clean==0 && force == 0 && first_missing >= raiddisks) {
+ if (st->ss->external == 0 &&
+ assume_clean==0 && force == 0 && first_missing >= raiddisks) {
switch ( level ) {
case 4:
case 5:
* into a spare, else the create will fail
*/
if (assume_clean == 0 && force == 0 && first_missing < raiddisks &&
+ st->ss->external == 0 &&
second_missing >= raiddisks && level == 6) {
insert_point = raiddisks - 1;
if (insert_point == first_missing)
return 1;
}
+ /* We need to create the device */
+ map_lock(&map);
+ mdfd = create_mddev(mddev, name, autof, LOCAL, chosen_name);
+ if (mdfd < 0)
+ return 1;
+ mddev = chosen_name;
+
+ vers = md_get_version(mdfd);
+ if (vers < 9000) {
+ fprintf(stderr, Name ": Create requires md driver version 0.90.0 or later\n");
+ goto abort;
+ } else {
+ mdu_array_info_t inf;
+ memset(&inf, 0, sizeof(inf));
+ ioctl(mdfd, GET_ARRAY_INFO, &inf);
+ if (inf.working_disks != 0) {
+ fprintf(stderr, Name ": another array by this name"
+ " is already running.\n");
+ goto abort;
+ }
+ }
+
/* Ok, lets try some ioctls */
info.array.level = level;
assume_clean
) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
- info.resync_start = ~0ULL;
+ info.resync_start = MaxSector;
} else {
info.array.state = 0; /* not clean, but no errors */
info.resync_start = 0;
* /dev/md/home -> home
* /dev/mdhome -> home
*/
+ /* FIXME compare this with rules in create_mddev */
name = strrchr(mddev, '/');
if (name) {
name++;
}
}
if (!st->ss->init_super(st, &info.array, size, name, homehost, uuid))
- return 1;
+ goto abort;
total_slots = info.array.nr_disks;
+ sysfs_init(&info, mdfd, 0);
st->ss->getinfo_super(st, &info);
if (did_default && verbose >= 0) {
- if (info.text_version[0] == '/') {
+ if (is_subarray(info.text_version)) {
int dnum = devname2devnum(info.text_version+1);
char *path;
int mdp = get_mdp_major();
" %s metadata\n", info.text_version);
}
+ map_update(&map, fd2devnum(mdfd), info.text_version,
+ info.uuid, chosen_name);
+ map_unlock(&map);
+
if (bitmap_file && vers < 9003) {
major_num = BITMAP_MAJOR_HOSTENDIAN;
#ifdef __BIG_ENDIAN
if (bitmap_file && strcmp(bitmap_file, "internal")==0) {
if ((vers%100) < 2) {
fprintf(stderr, Name ": internal bitmaps not supported by this kernel.\n");
- return 1;
+ goto abort;
+ }
+ if (!st->ss->add_internal_bitmap) {
+ fprintf(stderr, Name ": internal bitmaps not supported with %s metadata\n",
+ st->ss->name);
+ goto abort;
}
if (!st->ss->add_internal_bitmap(st, &bitmap_chunk,
delay, write_behind,
bitmapsize, 1, major_num)) {
fprintf(stderr, Name ": Given bitmap chunk size not supported.\n");
- return 1;
+ goto abort;
}
bitmap_file = NULL;
}
- sra = sysfs_read(mdfd, 0, 0);
+ sysfs_init(&info, mdfd, 0);
- if (st->ss->external) {
- char ver[100];
- strcat(strcpy(ver, "external:"),
- info.text_version);
- if (st->ss->external && st->subarray[0]) {
- /* member */
-
- /* When creating a member, we need to be careful
- * to negotiate with mdmon properly.
- * If it is already running, we cannot write to
- * the devices and must ask it to do that part.
- * If it isn't running, we write to the devices,
- * and then start it.
- * We hold an exclusive open on the container
- * device to make sure mdmon doesn't exit after
- * we checked that it is running.
- *
- * For now, fail if it is already running.
- */
- container_fd = open_dev_excl(st->container_dev);
- if (container_fd < 0) {
- fprintf(stderr, Name ": Cannot get exclusive "
- "open on container - weird.\n");
- return 1;
- }
- if (mdmon_running(st->container_dev)) {
- if (verbose)
- fprintf(stderr, Name ": reusing mdmon "
- "for %s.\n",
- devnum2devname(st->container_dev));
- st->update_tail = &st->updates;
- } else
- need_mdmon = 1;
- }
- if ((vers % 100) < 2 ||
- sra == NULL ||
- sysfs_set_str(sra, NULL, "metadata_version",
- ver) < 0) {
- fprintf(stderr, Name ": This kernel does not "
- "support external metadata.\n");
- return 1;
+ if (st->ss->external && st->subarray[0]) {
+ /* member */
+
+ /* When creating a member, we need to be careful
+ * to negotiate with mdmon properly.
+ * If it is already running, we cannot write to
+ * the devices and must ask it to do that part.
+ * If it isn't running, we write to the devices,
+ * and then start it.
+ * We hold an exclusive open on the container
+ * device to make sure mdmon doesn't exit after
+ * we checked that it is running.
+ *
+ * For now, fail if it is already running.
+ */
+ container_fd = open_dev_excl(st->container_dev);
+ if (container_fd < 0) {
+ fprintf(stderr, Name ": Cannot get exclusive "
+ "open on container - weird.\n");
+ goto abort;
}
- rv = sysfs_set_array(sra, &info);
- } else if ((vers % 100) >= 1) { /* can use different versions */
- mdu_array_info_t inf;
- memset(&inf, 0, sizeof(inf));
- inf.major_version = info.array.major_version;
- inf.minor_version = info.array.minor_version;
- rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
- } else
- rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
+ if (mdmon_running(st->container_dev)) {
+ if (verbose)
+ fprintf(stderr, Name ": reusing mdmon "
+ "for %s.\n",
+ devnum2devname(st->container_dev));
+ st->update_tail = &st->updates;
+ } else
+ need_mdmon = 1;
+ }
+ rv = set_array_info(mdfd, st, &info);
if (rv) {
- fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
+ fprintf(stderr, Name ": failed to set array info for %s: %s\n",
mddev, strerror(errno));
- return 1;
+ goto abort;
}
if (bitmap_file) {
delay, write_behind,
bitmapsize,
major_num)) {
- return 1;
+ goto abort;
}
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
fprintf(stderr, Name ": weird: %s cannot be openned\n",
bitmap_file);
- return 1;
+ goto abort;
}
if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
mddev, strerror(errno));
- return 1;
+ goto abort;
}
}
abort();
if (dnum == insert_point) {
moved_disk = dv;
+ continue;
}
- if (dnum == insert_point ||
- strcasecmp(dv->devname, "missing")==0)
+ if (strcasecmp(dv->devname, "missing")==0)
continue;
+ if (have_container)
+ moved_disk = NULL;
+ if (have_container && dnum < info.array.raid_disks - 1)
+ /* repeatedly use the container */
+ moved_disk = dv;
switch(pass) {
case 1:
else
inf->disk.state = 0;
- if (dv->writemostly)
+ if (dv->writemostly == 1)
inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
- if (st->ss->external && st->subarray[0])
- fd = open(dv->devname, O_RDWR, 0);
- else
- fd = open(dv->devname, O_RDWR|O_EXCL,0);
-
- if (fd < 0) {
- fprintf(stderr, Name ": failed to open %s "
- "after earlier success - aborting\n",
- dv->devname);
- return 1;
+ if (have_container)
+ fd = -1;
+ else {
+ if (st->ss->external && st->subarray[0])
+ fd = open(dv->devname, O_RDWR);
+ else
+ fd = open(dv->devname, O_RDWR|O_EXCL);
+
+ if (fd < 0) {
+ fprintf(stderr, Name ": failed to open %s "
+ "after earlier success - aborting\n",
+ dv->devname);
+ goto abort;
+ }
+ fstat(fd, &stb);
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
+ }
+ if (fd >= 0)
+ remove_partitions(fd);
+ if (st->ss->add_to_super(st, &inf->disk,
+ fd, dv->devname)) {
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
}
- fstat(fd, &stb);
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
-
- remove_partitions(fd);
- st->ss->add_to_super(st, &inf->disk,
- fd, dv->devname);
st->ss->getinfo_super(st, inf);
-
- /* getinfo_super might have lost these ... */
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
+ safe_mode_delay = inf->safe_mode_delay;
+
+ if (have_container && verbose > 0)
+ fprintf(stderr, Name ": Using %s for device %d\n",
+ map_dev(inf->disk.major,
+ inf->disk.minor,
+ 0), dnum);
+
+ if (!have_container) {
+ /* getinfo_super might have lost these ... */
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
+ }
break;
case 2:
inf->errors = 0;
rv = 0;
- if (st->ss->external)
- rv = sysfs_add_disk(sra, inf);
- else
- rv = ioctl(mdfd, ADD_NEW_DISK,
- &inf->disk);
+ rv = add_disk(mdfd, st, &info, inf);
if (rv) {
fprintf(stderr,
"failed: %s\n",
dv->devname, strerror(errno));
st->ss->free_super(st);
- return 1;
+ goto abort;
}
break;
}
- if (dv == moved_disk && dnum != insert_point) break;
+ if (!have_container &&
+ dv == moved_disk && dnum != insert_point) break;
}
if (pass == 1) {
+ struct mdinfo info_new;
+ struct map_ent *me = NULL;
+
+ /* check to see if the uuid has changed due to these
+ * metadata changes, and if so update the member array
+ * and container uuid. Note ->write_init_super clears
+ * the subarray cursor such that ->getinfo_super once
+ * again returns container info.
+ */
+ map_lock(&map);
+ st->ss->getinfo_super(st, &info_new);
+ if (st->ss->external && level != LEVEL_CONTAINER &&
+ !same_uuid(info_new.uuid, info.uuid, 0)) {
+ map_update(&map, fd2devnum(mdfd),
+ info_new.text_version,
+ info_new.uuid, chosen_name);
+ me = map_by_devnum(&map, st->container_dev);
+ }
+
st->ss->write_init_super(st);
+
+ /* update parent container uuid */
+ if (me) {
+ char *path = strdup(me->path);
+
+ st->ss->getinfo_super(st, &info_new);
+ map_update(&map, st->container_dev,
+ info_new.text_version,
+ info_new.uuid, path);
+ free(path);
+ }
+ map_unlock(&map);
+
flush_metadata_updates(st);
}
}
free(infos);
st->ss->free_super(st);
- /* param is not actually used */
- if (level == LEVEL_CONTAINER)
- /* No need to start */
- ;
- else if (runstop == 1 || subdevs >= raiddisks) {
+ if (level == LEVEL_CONTAINER) {
+ /* No need to start. But we should signal udev to
+ * create links */
+ sysfs_uevent(&info, "change");
+ if (verbose >= 0)
+ fprintf(stderr, Name ": container %s prepared.\n", mddev);
+ wait_for(chosen_name, mdfd);
+ } else if (runstop == 1 || subdevs >= raiddisks) {
if (st->ss->external) {
switch(level) {
case LEVEL_LINEAR:
case LEVEL_MULTIPATH:
case 0:
- sysfs_set_str(sra, NULL, "array_state",
+ sysfs_set_str(&info, NULL, "array_state",
"active");
need_mdmon = 0;
break;
default:
- sysfs_set_str(sra, NULL, "array_state",
+ sysfs_set_str(&info, NULL, "array_state",
"readonly");
break;
}
+ sysfs_set_safemode(&info, safe_mode_delay);
} else {
+ /* param is not actually used */
mdu_param_t param;
if (ioctl(mdfd, RUN_ARRAY, ¶m)) {
fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
strerror(errno));
- Manage_runstop(mddev, mdfd, -1, 0);
- return 1;
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
}
}
if (verbose >= 0)
ping_monitor(devnum2devname(st->container_dev));
close(container_fd);
}
+ wait_for(chosen_name, mdfd);
} else {
fprintf(stderr, Name ": not starting array - not enough devices.\n");
}
+ close(mdfd);
return 0;
+
+ abort:
+ map_lock(&map);
+ map_remove(&map, fd2devnum(mdfd));
+ map_unlock(&map);
+
+ if (mdfd >= 0)
+ close(mdfd);
+ return 1;
}