/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
- * Email: <neilb@cse.unsw.edu.au>
- * Paper: Neil Brown
- * School of Computer Science and Engineering
- * The University of New South Wales
- * Sydney, 2052
- * Australia
+ * Email: <neilb@suse.de>
*/
#include "mdadm.h"
#include "md_u.h"
#include "md_p.h"
+#include <ctype.h>
#define REGISTER_DEV _IO (MD_MAJOR, 1)
#define START_MD _IO (MD_MAJOR, 2)
*
*/
mdu_array_info_t array;
+#ifndef MDASSEMBLE
+ struct mdinfo *mdi;
+#endif
if (md_get_version(fd) < 9000) {
fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
return 1;
}
+#ifndef MDASSEMBLE
+ /* If this is an externally-manage array, we need to modify the
+ * metadata_version so that mdmon doesn't undo our change.
+ */
+ mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
+ if (mdi &&
+ mdi->array.major_version == -1 &&
+ mdi->array.level > 0 &&
+ is_subarray(mdi->text_version)) {
+ char vers[64];
+ strcpy(vers, "external:");
+ strcat(vers, mdi->text_version);
+ if (readonly > 0) {
+ int rv;
+ /* We set readonly ourselves. */
+ vers[9] = '-';
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+
+ close(fd);
+ rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
+
+ if (rv < 0) {
+ fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+ devname, strerror(errno));
+
+ vers[9] = mdi->text_version[0];
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+ return 1;
+ }
+ } else {
+ char *cp;
+ /* We cannot set read/write - must signal mdmon */
+ vers[9] = '/';
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+
+ cp = strchr(vers+10, '/');
+ if (*cp)
+ *cp = 0;
+ ping_monitor(vers+10);
+ }
+ return 0;
+ }
+#endif
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
fprintf(stderr, Name ": %s does not appear to be active.\n",
devname);
#ifndef MDASSEMBLE
+static void remove_devices(int devnum, char *path)
+{
+ /* Remove all 'standard' devices for 'devnum', including
+ * partitions. Also remove names at 'path' - possibly with
+ * partition suffixes - which link to those names.
+ */
+ char base[40];
+ char *path2;
+ char link[1024];
+ int n;
+ int part;
+ char *be;
+ char *pe;
+
+ if (devnum >= 0)
+ sprintf(base, "/dev/md%d", devnum);
+ else
+ sprintf(base, "/dev/md_d%d", -1-devnum);
+ be = base + strlen(base);
+ if (path) {
+ path2 = malloc(strlen(path)+20);
+ strcpy(path2, path);
+ pe = path2 + strlen(path2);
+ } else
+ path = NULL;
+
+ for (part = 0; part < 16; part++) {
+ if (part) {
+ sprintf(be, "p%d", part);
+ if (path) {
+ if (isdigit(pe[-1]))
+ sprintf(pe, "p%d", part);
+ else
+ sprintf(pe, "%d", part);
+ }
+ }
+ /* FIXME test if really is md device ?? */
+ unlink(base);
+ if (path) {
+ n = readlink(path2, link, sizeof(link));
+ if (n && strlen(base) == n &&
+ strncmp(link, base, n) == 0)
+ unlink(path2);
+ }
+ }
+}
+
+
int Manage_runstop(char *devname, int fd, int runstop, int quiet)
{
/* Run or stop the array. array must already be configured
* required >= 0.90.0
+ * Only print failure messages if quiet == 0;
+ * quiet > 0 means really be quiet
+ * quiet < 0 means we will try again if it fails.
*/
mdu_param_t param; /* unused */
if (runstop == -1 && md_get_version(fd) < 9000) {
if (ioctl(fd, STOP_MD, 0)) {
- if (!quiet) fprintf(stderr, Name ": stopping device %s failed: %s\n",
- devname, strerror(errno));
+ if (quiet == 0) fprintf(stderr,
+ Name ": stopping device %s "
+ "failed: %s\n",
+ devname, strerror(errno));
return 1;
}
}
} else if (runstop < 0){
struct map_ent *map = NULL;
struct stat stb;
- if (ioctl(fd, STOP_ARRAY, NULL)) {
- if (quiet==0)
- fprintf(stderr, Name ": fail to stop array %s: %s\n",
+ struct mdinfo *mdi;
+ int devnum;
+ /* If this is an mdmon managed array, just write 'inactive'
+ * to the array state and let mdmon clear up.
+ */
+ devnum = fd2devnum(fd);
+ mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
+ if (mdi &&
+ mdi->array.level > 0 &&
+ is_subarray(mdi->text_version)) {
+ /* This is mdmon managed. */
+ close(fd);
+ if (sysfs_set_str(mdi, NULL,
+ "array_state", "inactive") < 0) {
+ if (quiet == 0)
+ fprintf(stderr, Name
+ ": failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ /* Give monitor a chance to act */
+ ping_monitor(mdi->text_version);
+
+ fd = open(devname, O_RDONLY);
+ } else if (mdi &&
+ mdi->array.major_version == -1 &&
+ mdi->array.minor_version == -2 &&
+ !is_subarray(mdi->text_version)) {
+ /* container, possibly mdmon-managed.
+ * Make sure mdmon isn't opening it, which
+ * would interfere with the 'stop'
+ */
+ ping_monitor(mdi->sys_name);
+ }
+
+ if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
+ if (quiet == 0) {
+ fprintf(stderr, Name
+ ": failed to stop array %s: %s\n",
devname, strerror(errno));
+ if (errno == EBUSY)
+ fprintf(stderr, "Perhaps a running "
+ "process, mounted filesystem "
+ "or active volume group?\n");
+ }
+ if (mdi)
+ sysfs_free(mdi);
return 1;
}
+ /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
+ * was stopped, so We'll do it here just to be sure. Drop any
+ * partitions as well...
+ */
+ if (fd >= 0)
+ ioctl(fd, BLKRRPART, 0);
+ if (mdi)
+ sysfs_uevent(mdi, "change");
+
+
+ if (devnum != NoMdDev &&
+ (stat("/dev/.udev", &stb) != 0 ||
+ check_env("MDADM_NO_UDEV"))) {
+ struct map_ent *mp = map_by_devnum(&map, devnum);
+ remove_devices(devnum, mp ? mp->path : NULL);
+ }
+
+
if (quiet <= 0)
fprintf(stderr, Name ": stopped %s\n", devname);
- if (fstat(fd, &stb) == 0) {
- int devnum;
- if (major(stb.st_rdev) == MD_MAJOR)
- devnum = minor(stb.st_rdev);
- else
- devnum = -1-(minor(stb.st_rdev)>>6);
+ if (devnum != NoMdDev) {
map_delete(&map, devnum);
map_write(map);
map_free(map);
} else {
j = 0;
- if (stat(dv->devname, &stb)) {
+ tfd = dev_open(dv->devname, O_RDONLY);
+ if (tfd < 0 || fstat(tfd, &stb) != 0) {
fprintf(stderr, Name ": cannot find %s: %s\n",
dv->devname, strerror(errno));
+ if (tfd >= 0)
+ close(tfd);
return 1;
}
+ close(tfd);
if ((stb.st_mode & S_IFMT) != S_IFBLK) {
fprintf(stderr, Name ": %s is not a "
"block device.\n",
return 1;
}
/* Make sure it isn't in use (in 2.6 or later) */
- tfd = open(dv->devname, O_RDONLY|O_EXCL);
+ tfd = dev_open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
if (tfd < 0) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
dv->devname, strerror(errno));
return 1;
}
- if (array.not_persistent == 0) {
-
- /* Make sure device is large enough */
- if (tst->ss->avail_size(tst, ldsize/512) <
- array_size) {
- fprintf(stderr, Name ": %s not large enough to join array\n",
- dv->devname);
- return 1;
- }
+ if (array.not_persistent == 0 || tst->ss->external) {
/* need to find a sample superblock to copy, and
- * a spare slot to use
+ * a spare slot to use.
+ * For 'external' array (well, container based),
+ * We can just load the metadata for the array.
*/
- for (j = 0; j < tst->max_devs; j++) {
+ if (tst->ss->external) {
+ tst->ss->load_super(tst, fd, NULL);
+ } else for (j = 0; j < tst->max_devs; j++) {
char *dev;
int dfd;
disc.number = j;
close(dfd);
break;
}
+ /* FIXME this is a bad test to be using */
if (!tst->sb) {
fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
return 1;
}
+
+ /* Make sure device is large enough */
+ if (tst->ss->avail_size(tst, ldsize/512) <
+ array_size) {
+ fprintf(stderr, Name ": %s not large enough to join array\n",
+ dv->devname);
+ return 1;
+ }
+
/* Possibly this device was recently part of the array
* and was temporarily removed, and is now being re-added.
* If so, we can simply re-add it.
disc.number = mdi.disk.number;
disc.raid_disk = mdi.disk.raid_disk;
disc.state = mdi.disk.state;
- if (dv->writemostly)
+ if (dv->writemostly == 1)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ if (dv->writemostly == 2)
+ disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
if (verbose >= 0)
fprintf(stderr, Name ": re-added %s\n", dv->devname);
continue;
}
+ if (errno == ENOMEM || errno == EROFS) {
+ fprintf(stderr, Name ": add new device failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ return 1;
+ }
/* fall back on normal-add */
}
}
disc.minor = minor(stb.st_rdev);
disc.number =j;
disc.state = 0;
- if (array.not_persistent==0) {
+ if (array.not_persistent==0 || tst->ss->external) {
int dfd;
- if (dv->writemostly)
+ if (dv->writemostly == 1)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- dfd = open(dv->devname, O_RDWR | O_EXCL);
- tst->ss->add_to_super(tst, &disc, dfd,
- dv->devname);
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname)) {
+ close(dfd);
+ return 1;
+ }
/* write_init_super will close 'dfd' */
- if (tst->ss->write_init_super(tst))
+ if (tst->ss->external)
+ /* mdmon will write the metadata */
+ close(dfd);
+ else if (tst->ss->write_init_super(tst))
return 1;
} else if (dv->re_add) {
/* this had better be raid1.
break;
}
}
- if (dv->writemostly)
+ if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
- if (ioctl(fd,ADD_NEW_DISK, &disc)) {
+ if (tst->ss->external) {
+ /* add a disk to an external metadata container
+ * only if mdmon is around to see it
+ */
+ struct mdinfo new_mdi;
+ struct mdinfo *sra;
+ int container_fd;
+ int devnum = fd2devnum(fd);
+
+ container_fd = open_dev_excl(devnum);
+ if (container_fd < 0) {
+ fprintf(stderr, Name ": add failed for %s:"
+ " could not get exclusive access to container\n",
+ dv->devname);
+ return 1;
+ }
+
+ if (!mdmon_running(devnum)) {
+ fprintf(stderr, Name ": add failed for %s: mdmon not running\n",
+ dv->devname);
+ close(container_fd);
+ return 1;
+ }
+
+ sra = sysfs_read(container_fd, -1, 0);
+ if (!sra) {
+ fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
+ dv->devname);
+ close(container_fd);
+ return 1;
+ }
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ tst->ss->getinfo_super(tst, &new_mdi);
+ new_mdi.disk.major = disc.major;
+ new_mdi.disk.minor = disc.minor;
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ fprintf(stderr, Name ": add new device to external metadata"
+ " failed for %s\n", dv->devname);
+ close(container_fd);
+ return 1;
+ }
+ ping_monitor(devnum2devname(devnum));
+ sysfs_free(sra);
+ close(container_fd);
+ } else if (ioctl(fd, ADD_NEW_DISK, &disc)) {
fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
dv->devname, j, strerror(errno));
return 1;
" to container - odd\n");
return 1;
}
- if (!sysfs_unique_holder(dnum, stb.st_rdev)) {
+ /* in the detached case it is not possible to
+ * check if we are the unique holder, so just
+ * rely on the 'detached' checks
+ */
+ if (strcmp(dv->devname, "detached") == 0 ||
+ sysfs_unique_holder(dnum, stb.st_rdev))
+ /* pass */;
+ else {
fprintf(stderr, Name
": %s is %s, cannot remove.\n",
dnprintable,
close(lfd);
return 1;
}
+ if (tst->ss->external) {
+ /*
+ * Before dropping our exclusive open we make an
+ * attempt at preventing mdmon from seeing an
+ * 'add' event before reconciling this 'remove'
+ * event.
+ */
+ char *name = devnum2devname(fd2devnum(fd));
+
+ if (!name) {
+ fprintf(stderr, Name ": unable to get container name\n");
+ return 1;
+ }
+
+ ping_manager(name);
+ free(name);
+ }
close(lfd);
if (verbose >= 0)
fprintf(stderr, Name ": hot removed %s\n",