/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
- * Email: <neilb@cse.unsw.edu.au>
- * Paper: Neil Brown
- * School of Computer Science and Engineering
- * The University of New South Wales
- * Sydney, 2052
- * Australia
+ * Email: <neilb@suse.de>
*/
#include "mdadm.h"
{
int layout = UnSet;
- if (st && st->ss->default_layout)
- layout = st->ss->default_layout(level);
+ if (st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &level, &layout, NULL);
if (layout == UnSet)
switch(level) {
layout = 0x102; /* near=2, far=1 */
if (verbose > 0)
fprintf(stderr,
- Name ": layout defaults to n1\n");
+ Name ": layout defaults to n2\n");
break;
case 5:
case 6:
int Create(struct supertype *st, char *mddev,
- int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks,
+ int chunk, int level, int layout, unsigned long long size,
+ int raiddisks, int sparedisks,
char *name, char *homehost, int *uuid,
- int subdevs, mddev_dev_t devlist,
+ int subdevs, struct mddev_dev *devlist,
int runstop, int verbose, int force, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof)
+ char *bitmap_file, int bitmap_chunk, int write_behind,
+ int delay, int autof)
{
/*
* Create a new raid array.
char *mindisc = NULL;
char *maxdisc = NULL;
int dnum;
- mddev_dev_t dv;
+ struct mddev_dev *dv;
int fail=0, warn=0;
struct stat stb;
int first_missing = subdevs * 2;
struct mdinfo info, *infos;
int did_default = 0;
int do_default_layout = 0;
+ int do_default_chunk = 0;
unsigned long safe_mode_delay = 0;
char chosen_name[1024];
struct map_ent *map = NULL;
int major_num = BITMAP_MAJOR_HI;
memset(&info, 0, sizeof(info));
-
- if (level == UnSet) {
- /* "ddf" and "imsm" metadata only supports one level - should possibly
- * push this into metadata handler??
- */
- if (st && (st->ss == &super_ddf || st->ss == &super_imsm))
- level = LEVEL_CONTAINER;
- }
-
+ if (level == UnSet && st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &level, NULL, NULL);
if (level == UnSet) {
fprintf(stderr,
Name ": a RAID level is needed to create an array.\n");
inf.raid_disks == 0) {
/* yep, looks like a container */
if (st) {
- rv = st->ss->load_super(st, fd,
- devlist->devname);
+ rv = st->ss->load_container(st, fd,
+ devlist->devname);
if (rv == 0)
have_container = 1;
} else {
- st = guess_super(fd);
+ st = super_by_fd(fd, NULL);
if (st && !(rv = st->ss->
- load_super(st, fd,
- devlist->devname)))
+ load_container(st, fd,
+ devlist->devname)))
have_container = 1;
else
st = NULL;
if (st && st->ss->external && sparedisks) {
fprintf(stderr,
Name ": This metadata type does not support "
- "spare disks are create time\n");
+ "spare disks at create time\n");
return 1;
}
if (subdevs > raiddisks+sparedisks) {
case 10:
case 6:
case 0:
- case LEVEL_LINEAR: /* linear */
- if (chunk == 0) {
+ if (chunk == 0 || chunk == UnSet) {
+ chunk = UnSet;
+ do_default_chunk = 1;
+ /* chunk will be set later */
+ }
+ break;
+ case LEVEL_LINEAR:
+ /* a chunksize of zero 0s perfectly valid (and preferred) since 2.6.16 */
+ if (get_linux_version() < 2006016 && chunk == 0) {
chunk = 64;
if (verbose > 0)
fprintf(stderr, Name ": chunk size defaults to 64K\n");
return 1;
}
+ if (size && chunk && chunk != UnSet)
+ size &= ~(unsigned long long)(chunk - 1);
newsize = size * 2;
if (st && ! st->ss->validate_geometry(st, level, layout, raiddisks,
- chunk, size*2, NULL, &newsize, verbose>=0))
+ &chunk, size*2, NULL, &newsize, verbose>=0))
return 1;
+
+ if (chunk && chunk != UnSet) {
+ newsize &= ~(unsigned long long)(chunk*2 - 1);
+ if (do_default_chunk) {
+ /* default chunk was just set */
+ if (verbose > 0)
+ fprintf(stderr, Name ": chunk size "
+ "defaults to %dK\n", chunk);
+ size &= ~(unsigned long long)(chunk - 1);
+ do_default_chunk = 0;
+ }
+ }
+
if (size == 0) {
size = newsize / 2;
+ if (level == 1)
+ /* If this is ever reshaped to RAID5, we will
+ * need a chunksize. So round it off a bit
+ * now just to be safe
+ */
+ size &= ~(64ULL-1);
+
if (size && verbose > 0)
fprintf(stderr, Name ": setting size to %lluK\n",
(unsigned long long)size);
for (dv=devlist; dv && !have_container; dv=dv->next, dnum++) {
char *dname = dv->devname;
unsigned long long freesize;
+ int dfd;
+
if (strcasecmp(dname, "missing")==0) {
if (first_missing > dnum)
first_missing = dnum;
missing_disks ++;
continue;
}
+ dfd = open(dname, O_RDONLY);
+ if (dfd < 0) {
+ fprintf(stderr, Name ": cannot open %s: %s\n",
+ dname, strerror(errno));
+ exit(2);
+ }
+ if (fstat(dfd, &stb) != 0 ||
+ (stb.st_mode & S_IFMT) != S_IFBLK) {
+ close(dfd);
+ fprintf(stderr, Name ": %s is not a block device\n",
+ dname);
+ exit(2);
+ }
+ close(dfd);
info.array.working_disks++;
if (dnum < raiddisks)
info.array.active_disks++;
char *name = "default";
for(i=0; !st && superlist[i]; i++) {
st = superlist[i]->match_metadata_desc(name);
+ if (!st)
+ continue;
if (do_default_layout)
layout = default_layout(st, level, verbose);
- if (st && !st->ss->validate_geometry
- (st, level, layout, raiddisks,
- chunk, size*2, dname, &freesize,
- verbose > 0))
+ switch (st->ss->validate_geometry(
+ st, level, layout, raiddisks,
+ &chunk, size*2, dname, &freesize,
+ verbose > 0)) {
+ case -1: /* Not valid, message printed, and not
+ * worth checking any further */
+ exit(2);
+ break;
+ case 0: /* Geometry not valid */
+ free(st);
st = NULL;
+ chunk = do_default_chunk ? UnSet : chunk;
+ break;
+ case 1: /* All happy */
+ break;
+ }
}
if (!st) {
+ int dfd = open(dname, O_RDONLY|O_EXCL);
+ if (dfd < 0) {
+ fprintf(stderr, Name ": cannot open %s: %s\n",
+ dname, strerror(errno));
+ exit(2);
+ }
fprintf(stderr, Name ": device %s not suitable "
"for any style of array\n",
dname);
did_default = 1;
} else {
if (do_default_layout)
- layout = default_layout(st, level, verbose);
+ layout = default_layout(st, level, 0);
if (!st->ss->validate_geometry(st, level, layout,
raiddisks,
- chunk, size*2, dname,
+ &chunk, size*2, dname,
&freesize,
- verbose > 0)) {
+ verbose >= 0)) {
fprintf(stderr,
Name ": %s is not suitable for "
}
freesize /= 2; /* convert to K */
- if (chunk) {
+ if (chunk && chunk != UnSet) {
/* round to chunk size */
freesize = freesize & ~(chunk-1);
+ if (do_default_chunk) {
+ /* default chunk was just set */
+ if (verbose > 0)
+ fprintf(stderr, Name ": chunk size "
+ "defaults to %dK\n", chunk);
+ size &= ~(unsigned long long)(chunk - 1);
+ do_default_chunk = 0;
+ }
}
if (size && freesize < size) {
warn |= check_ext2(fd, dname);
warn |= check_reiser(fd, dname);
warn |= check_raid(fd, dname);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1)
+ /* metadata at front */
+ warn |= check_partitions(fd, dname, 0, 0);
+ else if (level == 1 || level == LEVEL_CONTAINER
+ || (level == 0 && raiddisks == 1))
+ /* partitions could be meaningful */
+ warn |= check_partitions(fd, dname, freesize*2, size*2);
+ else
+ /* partitions cannot be meaningful */
+ warn |= check_partitions(fd, dname, 0, 0);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1 &&
+ did_default &&
+ level == 1 &&
+ (warn & 1024) == 0) {
+ warn |= 1024;
+ fprintf(stderr, Name ": Note: this array has metadata at the start and\n"
+ " may not be suitable as a boot device. If you plan to\n"
+ " store '/boot' on this device please ensure that\n"
+ " your boot-loader understands md/v1.x metadata, or use\n"
+ " --metadata=0.90\n");
+ }
close(fd);
}
}
+ if (raiddisks + sparedisks > st->max_devs) {
+ fprintf(stderr, Name ": Too many devices:"
+ " %s metadata only supports %d\n",
+ st->ss->name, st->max_devs);
+ return 1;
+ }
if (have_container)
info.array.working_disks = raiddisks;
if (fail) {
/* size is meaningful */
if (!st->ss->validate_geometry(st, level, layout,
raiddisks,
- chunk, minsize*2,
+ &chunk, minsize*2,
NULL, NULL, 0)) {
fprintf(stderr, Name ": devices too large for RAID level %d\n", level);
return 1;
}
size = minsize;
+ if (level == 1)
+ /* If this is ever reshaped to RAID5, we will
+ * need a chunksize. So round it off a bit
+ * now just to be safe
+ */
+ size &= ~(64ULL-1);
if (verbose > 0)
fprintf(stderr, Name ": size set to %lluK\n", size);
}
/* We need to create the device */
map_lock(&map);
mdfd = create_mddev(mddev, name, autof, LOCAL, chosen_name);
- if (mdfd < 0)
+ if (mdfd < 0) {
+ map_unlock(&map);
return 1;
+ }
+ /* verify if chosen_name is not in use,
+ * it could be in conflict with already existing device
+ * e.g. container, array
+ */
+ if (strncmp(chosen_name, "/dev/md/", 8) == 0
+ && map_by_name(&map, chosen_name+8) != NULL) {
+ fprintf(stderr, Name ": Array name %s is in use already.\n",
+ chosen_name);
+ close(mdfd);
+ map_unlock(&map);
+ return 1;
+ }
mddev = chosen_name;
vers = md_get_version(mdfd);
if (vers < 9000) {
fprintf(stderr, Name ": Create requires md driver version 0.90.0 or later\n");
- goto abort;
+ goto abort_locked;
} else {
mdu_array_info_t inf;
memset(&inf, 0, sizeof(inf));
if (inf.working_disks != 0) {
fprintf(stderr, Name ": another array by this name"
" is already running.\n");
- goto abort;
+ goto abort_locked;
}
}
assume_clean
) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
- info.resync_start = ~0ULL;
+ info.resync_start = MaxSector;
} else {
info.array.state = 0; /* not clean, but no errors */
info.resync_start = 0;
}
}
if (!st->ss->init_super(st, &info.array, size, name, homehost, uuid))
- goto abort;
+ goto abort_locked;
total_slots = info.array.nr_disks;
+ st->ss->getinfo_super(st, &info, NULL);
sysfs_init(&info, mdfd, 0);
- st->ss->getinfo_super(st, &info);
if (did_default && verbose >= 0) {
if (is_subarray(info.text_version)) {
fprintf(stderr, Name ": internal bitmaps not supported by this kernel.\n");
goto abort;
}
+ if (!st->ss->add_internal_bitmap) {
+ fprintf(stderr, Name ": internal bitmaps not supported with %s metadata\n",
+ st->ss->name);
+ goto abort;
+ }
if (!st->ss->add_internal_bitmap(st, &bitmap_chunk,
delay, write_behind,
bitmapsize, 1, major_num)) {
sysfs_init(&info, mdfd, 0);
- if (st->ss->external && st->subarray[0]) {
+ if (st->ss->external && st->container_dev != NoMdDev) {
/* member */
/* When creating a member, we need to be careful
}
infos = malloc(sizeof(*infos) * total_slots);
+ if (!infos) {
+ fprintf(stderr, Name ": Unable to allocate memory\n");
+ goto abort;
+ }
for (pass=1; pass <=2 ; pass++) {
- mddev_dev_t moved_disk = NULL; /* the disk that was moved out of the insert point */
+ struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
for (dnum=0, dv = devlist ; dv ;
dv=(dv->next)?(dv->next):moved_disk, dnum++) {
if (have_container)
fd = -1;
else {
- if (st->ss->external && st->subarray[0])
+ if (st->ss->external &&
+ st->container_dev != NoMdDev)
fd = open(dv->devname, O_RDWR);
else
fd = open(dv->devname, O_RDWR|O_EXCL);
if (fd >= 0)
remove_partitions(fd);
if (st->ss->add_to_super(st, &inf->disk,
- fd, dv->devname))
+ fd, dv->devname)) {
+ ioctl(mdfd, STOP_ARRAY, NULL);
goto abort;
- st->ss->getinfo_super(st, inf);
+ }
+ st->ss->getinfo_super(st, inf, NULL);
safe_mode_delay = inf->safe_mode_delay;
if (have_container && verbose > 0)
break;
case 2:
inf->errors = 0;
- rv = 0;
rv = add_disk(mdfd, st, &info, inf);
Name ": ADD_NEW_DISK for %s "
"failed: %s\n",
dv->devname, strerror(errno));
- st->ss->free_super(st);
goto abort;
}
break;
dv == moved_disk && dnum != insert_point) break;
}
if (pass == 1) {
- st->ss->write_init_super(st);
+ struct mdinfo info_new;
+ struct map_ent *me = NULL;
+
+ /* check to see if the uuid has changed due to these
+ * metadata changes, and if so update the member array
+ * and container uuid. Note ->write_init_super clears
+ * the subarray cursor such that ->getinfo_super once
+ * again returns container info.
+ */
+ map_lock(&map);
+ st->ss->getinfo_super(st, &info_new, NULL);
+ if (st->ss->external && level != LEVEL_CONTAINER &&
+ !same_uuid(info_new.uuid, info.uuid, 0)) {
+ map_update(&map, fd2devnum(mdfd),
+ info_new.text_version,
+ info_new.uuid, chosen_name);
+ me = map_by_devnum(&map, st->container_dev);
+ }
+
+ if (st->ss->write_init_super(st)) {
+ st->ss->free_super(st);
+ goto abort_locked;
+ }
+
+ /* update parent container uuid */
+ if (me) {
+ char *path = strdup(me->path);
+
+ st->ss->getinfo_super(st, &info_new, NULL);
+ map_update(&map, st->container_dev,
+ info_new.text_version,
+ info_new.uuid, path);
+ free(path);
+ }
+ map_unlock(&map);
+
flush_metadata_updates(st);
+ st->ss->free_super(st);
}
}
free(infos);
- st->ss->free_super(st);
if (level == LEVEL_CONTAINER) {
/* No need to start. But we should signal udev to
sysfs_uevent(&info, "change");
if (verbose >= 0)
fprintf(stderr, Name ": container %s prepared.\n", mddev);
- wait_for(chosen_name);
+ wait_for(chosen_name, mdfd);
} else if (runstop == 1 || subdevs >= raiddisks) {
if (st->ss->external) {
+ int err;
switch(level) {
case LEVEL_LINEAR:
case LEVEL_MULTIPATH:
case 0:
- sysfs_set_str(&info, NULL, "array_state",
- "active");
+ err = sysfs_set_str(&info, NULL, "array_state",
+ "active");
need_mdmon = 0;
break;
default:
- sysfs_set_str(&info, NULL, "array_state",
- "readonly");
+ err = sysfs_set_str(&info, NULL, "array_state",
+ "readonly");
break;
}
sysfs_set_safemode(&info, safe_mode_delay);
+ if (err) {
+ fprintf(stderr, Name ": failed to"
+ " activate array.\n");
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
+ }
} else {
/* param is not actually used */
mdu_param_t param;
if (ioctl(mdfd, RUN_ARRAY, ¶m)) {
fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
strerror(errno));
- Manage_runstop(mddev, mdfd, -1, 0);
+ if (info.array.chunk_size & (info.array.chunk_size-1)) {
+ fprintf(stderr, " : Problem may be that "
+ "chunk size is not a power of 2\n");
+ }
+ ioctl(mdfd, STOP_ARRAY, NULL);
goto abort;
}
}
if (verbose >= 0)
fprintf(stderr, Name ": array %s started.\n", mddev);
- if (st->ss->external && st->subarray[0]) {
+ if (st->ss->external && st->container_dev != NoMdDev) {
if (need_mdmon)
start_mdmon(st->container_dev);
- ping_monitor(devnum2devname(st->container_dev));
+ ping_monitor_by_id(st->container_dev);
close(container_fd);
}
- wait_for(chosen_name);
+ wait_for(chosen_name, mdfd);
} else {
fprintf(stderr, Name ": not starting array - not enough devices.\n");
}
return 0;
abort:
+ map_lock(&map);
+ abort_locked:
+ map_remove(&map, fd2devnum(mdfd));
+ map_unlock(&map);
+
if (mdfd >= 0)
close(mdfd);
return 1;