#define START_MD _IO (MD_MAJOR, 2)
#define STOP_MD _IO (MD_MAJOR, 3)
-int Build(char *mddev, int mdfd, int chunk, int level, int layout,
- int raiddisks,
- mddev_dev_t devlist, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay,
- int verbose, unsigned long long size)
+int Build(char *mddev, int chunk, int level, int layout,
+ int raiddisks, mddev_dev_t devlist, int assume_clean,
+ char *bitmap_file, int bitmap_chunk, int write_behind,
- int delay, int verbose, int autof)
++ int delay, int verbose, int autof, unsigned long long size)
{
/* Build a linear or raid0 arrays without superblocks
* We cannot really do any checks, we just do it.
int subdevs = 0, missing_disks = 0;
mddev_dev_t dv;
int bitmap_fd;
- unsigned long long size = ~0ULL;
unsigned long long bitmapsize;
+ int mdfd;
+ char chosen_name[1024];
+ int uuid[4] = {0,0,0,0};
+ struct map_ent *map = NULL;
/* scan all devices, make sure they really are block devices */
for (dv = devlist; dv; dv=dv->next) {
break;
}
+ /* We need to create the device. It can have no name. */
+ map_lock(&map);
+ mdfd = create_mddev(mddev, NULL, autof, LOCAL,
+ chosen_name);
+ if (mdfd < 0) {
+ map_unlock(&map);
+ return 1;
+ }
+ mddev = chosen_name;
+
+ map_update(&map, fd2devnum(mdfd), "none", uuid, chosen_name);
+ map_unlock(&map);
vers = md_get_version(mdfd);
if (vers >= 9000) {
mdu_array_info_t array;
array.level = level;
- array.size = 0;
+ array.size = size;
array.nr_disks = raiddisks;
array.raid_disks = raiddisks;
array.md_minor = 0;
if (ioctl(mdfd, SET_ARRAY_INFO, &array)) {
fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
mddev, strerror(errno));
- return 1;
+ goto abort;
}
} else if (bitmap_file) {
fprintf(stderr, Name ": bitmaps not supported with this kernel\n");
- return 1;
+ goto abort;
}
if (bitmap_file && level <= 0) {
fprintf(stderr, Name ": bitmaps not meaningful with level %s\n",
map_num(pers, level)?:"given");
- return 1;
+ goto abort;
}
/* now add the devices */
for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) {
(size == 0 || dsize < size))
size = dsize;
close(fd);
- if (vers>= 9000) {
+ if (vers >= 9000) {
mdu_disk_info_t disk;
disk.number = i;
disk.raid_disk = i;
if (bitmap_chunk == UnSet) {
fprintf(stderr, Name ": %s cannot be openned.",
bitmap_file);
- return 1;
+ goto abort;
}
#endif
if (vers < 9003) {
bitmapsize = size>>9; /* FIXME wrong for RAID10 */
if (CreateBitmap(bitmap_file, 1, NULL, bitmap_chunk,
delay, write_behind, bitmapsize, major)) {
- return 1;
+ goto abort;
}
bitmap_fd = open(bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
fprintf(stderr, Name ": %s cannot be openned.",
bitmap_file);
- return 1;
+ goto abort;
}
}
if (bitmap_fd >= 0) {
if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
mddev, strerror(errno));
- return 1;
+ goto abort;
}
}
}
if (verbose >= 0)
fprintf(stderr, Name ": array %s built and started.\n",
mddev);
+ wait_for(mddev, mdfd);
+ close(mdfd);
return 0;
abort:
ioctl(mdfd, STOP_ARRAY, 0);
else
ioctl(mdfd, STOP_MD, 0);
+ close(mdfd);
return 1;
}
# e.g. make CXFLAGS=-O to optimise
TCC = tcc
UCLIBC_GCC = $(shell for nm in i386-uclibc-linux-gcc i386-uclibc-gcc; do which $$nm > /dev/null && { echo $$nm ; exit; } ; done; echo false No uclibc found )
-DIET_GCC = diet gcc
+#DIET_GCC = diet gcc
+# sorry, but diet-libc doesn't know about posix_memalign,
+# so we cannot use it any more.
+DIET_GCC = gcc -DHAVE_STDINT_H
KLIBC=/home/src/klibc/klibc-0.77
CC = $(CROSS_COMPILE)gcc
CXFLAGS = -ggdb
CWFLAGS = -Wall -Werror -Wstrict-prototypes
+ifdef WARN_UNUSED
+CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O
+endif
ifdef DEBIAN
CPPFLAGS= -DDEBIAN
OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
Incremental.o \
- mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o sha1.o \
- mapfile.o
+ mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
+ restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \
+ platform-intel.o probe_roms.o
+
SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
Incremental.c \
- mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c sha1.c \
- mapfile.c
+ mdopen.c super0.c super1.c super-ddf.c super-intel.c bitmap.c \
+ restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c msg.c \
+ platform-intel.c probe_roms.c
+
+MON_OBJS = mdmon.o monitor.o managemon.o util.o mdstat.o sysfs.o config.o \
+ Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
+ super-ddf.o sha1.o crc32.o msg.o Monitor.o bitmap.o \
+ platform-intel.o probe_roms.o
+
STATICSRC = pwgr.c
STATICOBJS = pwgr.o
ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c dlink.c util.c \
- super0.c super1.c sha1.c sysfs.c
-ASSEMBLE_AUTO_SRCS := mdopen.c mdstat.c
+ super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
+ platform-intel.c probe_roms.c sysfs.c
+ASSEMBLE_AUTO_SRCS := mdopen.c
ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
ifdef MDASSEMBLE_AUTO
ASSEMBLE_SRCS += $(ASSEMBLE_AUTO_SRCS)
ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
endif
-all : mdadm mdadm.man md.man mdadm.conf.man
+all : mdadm mdmon mdadm.man md.man mdadm.conf.man
everything: all mdadm.static swap_super test_stripe \
mdassemble mdassemble.auto mdassemble.static mdassemble.man \
mdadm.klibc : $(SRCS) mdadm.h
rm -f $(OBJS)
- gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS)
+ $(CC) -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS)
mdadm.Os : $(SRCS) mdadm.h
- gcc -o mdadm.Os $(CFLAGS) -DHAVE_STDINT_H -Os $(SRCS)
+ $(CC) -o mdadm.Os $(CFLAGS) -DHAVE_STDINT_H -Os $(SRCS)
mdadm.O2 : $(SRCS) mdadm.h
- gcc -o mdadm.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 $(SRCS)
+ $(CC) -o mdadm.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 $(SRCS)
+mdmon : $(MON_OBJS)
+ $(CC) $(LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS)
+msg.o: msg.c msg.h
+
test_stripe : restripe.c mdadm.h
$(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c
mdassemble.man : mdassemble.8
nroff -man mdassemble.8 > mdassemble.man
-$(OBJS) : mdadm.h bitmap.h
+$(OBJS) : mdadm.h mdmon.h bitmap.h
+$(MON_OBJS) : mdadm.h mdmon.h bitmap.h
sha1.o : sha1.c sha1.h md5.h
$(CC) $(CFLAGS) -DHAVE_STDINT_H -o sha1.o -c sha1.c
-install : mdadm install-man
+install : mdadm mdmon install-man install-udev
$(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm
+ $(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon
install-static : mdadm.static install-man
$(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm
$(INSTALL) -D -m 644 md.4 $(DESTDIR)$(MAN4DIR)/md.4
$(INSTALL) -D -m 644 mdadm.conf.5 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5
+install-udev: udev-md-raid.rules
+ $(INSTALL) -D -m 644 udev-md-raid.rules $(DESTDIR)/lib/udev/rules.d/64-md-raid.rules
+
uninstall:
rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 md.4 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm
-test: mdadm test_stripe swap_super
+test: mdadm mdmon test_stripe swap_super
@echo "Please run 'sh ./test' as root"
clean :
- rm -f mdadm $(OBJS) $(STATICOBJS) core *.man mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
+ rm -f mdadm mdmon $(OBJS) $(MON_OBJS) $(STATICOBJS) core *.man \
+ mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
mdadm.Os mdadm.O2 \
mdassemble mdassemble.static mdassemble.auto mdassemble.uclibc \
mdassemble.klibc swap_super \
#include "mdadm.h"
#include "md_u.h"
#include "md_p.h"
+#include <ctype.h>
#define REGISTER_DEV _IO (MD_MAJOR, 1)
#define START_MD _IO (MD_MAJOR, 2)
*
*/
mdu_array_info_t array;
+#ifndef MDASSEMBLE
+ struct mdinfo *mdi;
+#endif
if (md_get_version(fd) < 9000) {
fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
return 1;
}
+#ifndef MDASSEMBLE
+ /* If this is an externally-manage array, we need to modify the
+ * metadata_version so that mdmon doesn't undo our change.
+ */
+ mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
+ if (mdi &&
+ mdi->array.major_version == -1 &&
+ mdi->array.level > 0 &&
+ is_subarray(mdi->text_version)) {
+ char vers[64];
+ strcpy(vers, "external:");
+ strcat(vers, mdi->text_version);
+ if (readonly > 0) {
+ int rv;
+ /* We set readonly ourselves. */
+ vers[9] = '-';
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+
+ close(fd);
+ rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
+
+ if (rv < 0) {
+ fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+ devname, strerror(errno));
+
+ vers[9] = mdi->text_version[0];
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+ return 1;
+ }
+ } else {
+ char *cp;
+ /* We cannot set read/write - must signal mdmon */
+ vers[9] = '/';
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+
+ cp = strchr(vers+10, '/');
+ if (*cp)
+ *cp = 0;
+ ping_monitor(vers+10);
+ }
+ return 0;
+ }
+#endif
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
fprintf(stderr, Name ": %s does not appear to be active.\n",
devname);
#ifndef MDASSEMBLE
+static void remove_devices(int devnum, char *path)
+{
+ /* Remove all 'standard' devices for 'devnum', including
+ * partitions. Also remove names at 'path' - possibly with
+ * partition suffixes - which link to those names.
+ */
+ char base[40];
+ char *path2;
+ char link[1024];
+ int n;
+ int part;
+ char *be;
+ char *pe;
+
+ if (devnum >= 0)
+ sprintf(base, "/dev/md%d", devnum);
+ else
+ sprintf(base, "/dev/md_d%d", -1-devnum);
+ be = base + strlen(base);
+ if (path) {
+ path2 = malloc(strlen(path)+20);
+ strcpy(path2, path);
+ pe = path2 + strlen(path2);
+ } else
+ path = NULL;
+
+ for (part = 0; part < 16; part++) {
+ if (part) {
+ sprintf(be, "p%d", part);
+ if (path) {
+ if (isdigit(pe[-1]))
+ sprintf(pe, "p%d", part);
+ else
+ sprintf(pe, "%d", part);
+ }
+ }
+ /* FIXME test if really is md device ?? */
+ unlink(base);
+ if (path) {
+ n = readlink(path2, link, sizeof(link));
+ if (n && strlen(base) == n &&
+ strncmp(link, base, n) == 0)
+ unlink(path2);
+ }
+ }
+}
+
+
int Manage_runstop(char *devname, int fd, int runstop, int quiet)
{
/* Run or stop the array. array must already be configured
* required >= 0.90.0
+ * Only print failure messages if quiet == 0;
+ * quiet > 0 means really be quiet
+ * quiet < 0 means we will try again if it fails.
*/
mdu_param_t param; /* unused */
if (runstop == -1 && md_get_version(fd) < 9000) {
if (ioctl(fd, STOP_MD, 0)) {
- if (!quiet) fprintf(stderr, Name ": stopping device %s failed: %s\n",
- devname, strerror(errno));
+ if (quiet == 0) fprintf(stderr,
+ Name ": stopping device %s "
+ "failed: %s\n",
+ devname, strerror(errno));
return 1;
}
}
} else if (runstop < 0){
struct map_ent *map = NULL;
struct stat stb;
- if (ioctl(fd, STOP_ARRAY, NULL)) {
- if (quiet==0) {
- fprintf(stderr, Name ": fail to stop array %s: %s\n",
+ struct mdinfo *mdi;
+ int devnum;
+ /* If this is an mdmon managed array, just write 'inactive'
+ * to the array state and let mdmon clear up.
+ */
+ devnum = fd2devnum(fd);
+ mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
+ if (mdi &&
+ mdi->array.level > 0 &&
+ is_subarray(mdi->text_version)) {
+ /* This is mdmon managed. */
+ close(fd);
+ if (sysfs_set_str(mdi, NULL,
+ "array_state", "inactive") < 0) {
+ if (quiet == 0)
+ fprintf(stderr, Name
+ ": failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ /* Give monitor a chance to act */
+ ping_monitor(mdi->text_version);
+
+ fd = open(devname, O_RDONLY);
+ } else if (mdi &&
+ mdi->array.major_version == -1 &&
+ mdi->array.minor_version == -2 &&
+ !is_subarray(mdi->text_version)) {
+ /* container, possibly mdmon-managed.
+ * Make sure mdmon isn't opening it, which
+ * would interfere with the 'stop'
+ */
+ ping_monitor(mdi->sys_name);
+ }
+
+ if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
+ if (quiet == 0) {
+ fprintf(stderr, Name
+ ": failed to stop array %s: %s\n",
devname, strerror(errno));
if (errno == EBUSY)
fprintf(stderr, "Perhaps a running "
"process, mounted filesystem "
"or active volume group?\n");
}
+ if (mdi)
+ sysfs_free(mdi);
return 1;
}
+ /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
+ * was stopped, so We'll do it here just to be sure. Drop any
+ * partitions as well...
+ */
+ if (fd >= 0)
+ ioctl(fd, BLKRRPART, 0);
+ if (mdi)
+ sysfs_uevent(mdi, "change");
+
+
+ if (devnum != NoMdDev &&
+ (stat("/dev/.udev", &stb) != 0 ||
+ check_env("MDADM_NO_UDEV"))) {
+ struct map_ent *mp = map_by_devnum(&map, devnum);
+ remove_devices(devnum, mp ? mp->path : NULL);
+ }
+
+
if (quiet <= 0)
fprintf(stderr, Name ": stopped %s\n", devname);
- if (fstat(fd, &stb) == 0) {
- int devnum;
- if (major(stb.st_rdev) == MD_MAJOR)
- devnum = minor(stb.st_rdev);
- else
- devnum = -1-(minor(stb.st_rdev)>>6);
+ if (devnum != NoMdDev) {
map_delete(&map, devnum);
map_write(map);
map_free(map);
struct supertype *st, *tst;
int duuid[4];
int ouuid[4];
+ int lfd = -1;
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
fprintf(stderr, Name ": cannot get array info for %s\n",
unsigned long long ldsize;
char dvname[20];
char *dnprintable = dv->devname;
+ int err;
next = dv->next;
jnext = 0;
return 1;
case 'a':
/* add the device */
-
+ if (tst->subarray[0]) {
+ fprintf(stderr, Name ": Cannot add disks to a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ return 1;
+ }
/* Make sure it isn't in use (in 2.6 or later) */
- tfd = open(dv->devname, O_RDONLY|O_EXCL);
+ tfd = open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
if (tfd < 0) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
dv->devname, strerror(errno));
}
close(tfd);
- if (array.major_version == 0 &&
+
+ if (!tst->ss->external &&
+ array.major_version == 0 &&
md_get_version(fd)%100 < 2) {
if (ioctl(fd, HOT_ADD_DISK,
(unsigned long)stb.st_rdev)==0) {
return 1;
}
- if (array.not_persistent == 0) {
+ if (array.not_persistent == 0 || tst->ss->external) {
/* need to find a sample superblock to copy, and
- * a spare slot to use
+ * a spare slot to use.
+ * For 'external' array (well, container based),
+ * We can just load the metadata for the array.
*/
- for (j = 0; j < tst->max_devs; j++) {
+ if (tst->ss->external) {
+ tst->ss->load_super(tst, fd, NULL);
+ } else for (j = 0; j < tst->max_devs; j++) {
char *dev;
int dfd;
disc.number = j;
close(dfd);
break;
}
+ /* FIXME this is a bad test to be using */
if (!tst->sb) {
fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
return 1;
fprintf(stderr, Name ": re-added %s\n", dv->devname);
continue;
}
+ if (errno == ENOMEM || errno == EROFS) {
+ fprintf(stderr, Name ": add new device failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ return 1;
+ }
/* fall back on normal-add */
}
}
disc.minor = minor(stb.st_rdev);
disc.number =j;
disc.state = 0;
- if (array.not_persistent==0) {
+ if (array.not_persistent==0 || tst->ss->external) {
+ int dfd;
if (dv->writemostly == 1)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- tst->ss->add_to_super(tst, &disc);
- if (tst->ss->write_init_super(tst, &disc,
- dv->devname))
+ dfd = open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname)) {
+ close(dfd);
+ return 1;
+ }
+ /* write_init_super will close 'dfd' */
+ if (tst->ss->external)
+ /* mdmon will write the metadata */
+ close(dfd);
+ else if (tst->ss->write_init_super(tst))
return 1;
} else if (dv->re_add) {
/* this had better be raid1.
}
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
- if (ioctl(fd,ADD_NEW_DISK, &disc)) {
+ if (tst->ss->external) {
+ /* add a disk to an external metadata container
+ * only if mdmon is around to see it
+ */
+ struct mdinfo new_mdi;
+ struct mdinfo *sra;
+ int container_fd;
+ int devnum = fd2devnum(fd);
+
+ container_fd = open_dev_excl(devnum);
+ if (container_fd < 0) {
+ fprintf(stderr, Name ": add failed for %s:"
+ " could not get exclusive access to container\n",
+ dv->devname);
+ return 1;
+ }
+
+ if (!mdmon_running(devnum)) {
+ fprintf(stderr, Name ": add failed for %s: mdmon not running\n",
+ dv->devname);
+ close(container_fd);
+ return 1;
+ }
+
+ sra = sysfs_read(container_fd, -1, 0);
+ if (!sra) {
+ fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
+ dv->devname);
+ close(container_fd);
+ return 1;
+ }
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ tst->ss->getinfo_super(tst, &new_mdi);
+ new_mdi.disk.major = disc.major;
+ new_mdi.disk.minor = disc.minor;
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ fprintf(stderr, Name ": add new device to external metadata"
+ " failed for %s\n", dv->devname);
+ close(container_fd);
+ return 1;
+ }
+ ping_monitor(devnum2devname(devnum));
+ sysfs_free(sra);
+ close(container_fd);
+ } else if (ioctl(fd, ADD_NEW_DISK, &disc)) {
fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
dv->devname, j, strerror(errno));
return 1;
case 'r':
/* hot remove */
+ if (tst->subarray[0]) {
+ fprintf(stderr, Name ": Cannot remove disks from a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ return 1;
+ }
+ if (tst->ss->external) {
+ /* To remove a device from a container, we must
+ * check that it isn't in use in an array.
+ * This involves looking in the 'holders'
+ * directory - there must be just one entry,
+ * the container.
+ * To ensure that it doesn't get used as a
+ * hold spare while we are checking, we
+ * get an O_EXCL open on the container
+ */
+ int dnum = fd2devnum(fd);
+ lfd = open_dev_excl(dnum);
+ if (lfd < 0) {
+ fprintf(stderr, Name
+ ": Cannot get exclusive access "
+ " to container - odd\n");
+ return 1;
+ }
+ /* in the detached case it is not possible to
+ * check if we are the unique holder, so just
+ * rely on the 'detached' checks
+ */
+ if (strcmp(dv->devname, "detached") == 0 ||
+ sysfs_unique_holder(dnum, stb.st_rdev))
+ /* pass */;
+ else {
+ fprintf(stderr, Name
+ ": %s is %s, cannot remove.\n",
+ dnprintable,
+ errno == EEXIST ? "still in use":
+ "not a member");
+ close(lfd);
+ return 1;
+ }
+ }
/* FIXME check that it is a current member */
- if (ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev)) {
+ err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
+ if (err && errno == ENODEV) {
+ /* Old kernels rejected this if no personality
+ * registered */
+ struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
+ struct mdinfo *dv = NULL;
+ if (sra)
+ dv = sra->devs;
+ for ( ; dv ; dv=dv->next)
+ if (dv->disk.major == major(stb.st_rdev) &&
+ dv->disk.minor == minor(stb.st_rdev))
+ break;
+ if (dv)
+ err = sysfs_set_str(sra, dv,
+ "state", "remove");
+ else
+ err = -1;
+ if (sra)
+ sysfs_free(sra);
+ }
+ if (err) {
fprintf(stderr, Name ": hot remove failed "
"for %s: %s\n", dnprintable,
strerror(errno));
+ if (lfd >= 0)
+ close(lfd);
return 1;
}
+ if (tst->ss->external) {
+ /*
+ * Before dropping our exclusive open we make an
+ * attempt at preventing mdmon from seeing an
+ * 'add' event before reconciling this 'remove'
+ * event.
+ */
+ char *name = devnum2devname(fd2devnum(fd));
+
+ if (!name) {
+ fprintf(stderr, Name ": unable to get container name\n");
+ return 1;
+ }
+
+ ping_manager(name);
+ free(name);
+ }
+ close(lfd);
if (verbose >= 0)
fprintf(stderr, Name ": hot removed %s\n",
dnprintable);
*/
unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0;
bitmap_info_t *info;
- char *buf, *unaligned;
+ void *buf;
int n, skip;
- unaligned = malloc(8192*2);
- buf = (char*) ((unsigned long)unaligned | 8191)+1;
+ if (posix_memalign(&buf, 512, 8192) != 0) {
+ fprintf(stderr, Name ": failed to allocate 8192 bytes\n");
+ return NULL;
+ }
n = read(fd, buf, 8192);
info = malloc(sizeof(*info));
fprintf(stderr, Name ": failed to read superblock of bitmap "
"file: %s\n", strerror(errno));
free(info);
- free(unaligned);
return NULL;
}
memcpy(&info->sb, buf, sizeof(info->sb));
int rv = 1;
char buf[64];
int swap;
+ __u32 uuid32[4];
info = bitmap_file_read(filename, brief, &st);
if (!info)
#else
swap = 1;
#endif
- if (swap) {
- printf(" UUID : %08x:%08x:%08x:%08x\n",
- swapl(*(__u32 *)(sb->uuid+0)),
- swapl(*(__u32 *)(sb->uuid+4)),
- swapl(*(__u32 *)(sb->uuid+8)),
- swapl(*(__u32 *)(sb->uuid+12)));
- } else {
- printf(" UUID : %08x:%08x:%08x:%08x\n",
- *(__u32 *)(sb->uuid+0),
- *(__u32 *)(sb->uuid+4),
- *(__u32 *)(sb->uuid+8),
- *(__u32 *)(sb->uuid+12));
- }
+ memcpy(uuid32, sb->uuid, 16);
+ if (swap)
+ printf(" UUID : %08x:%08x:%08x:%08x\n",
+ swapl(uuid32[0]),
+ swapl(uuid32[1]),
+ swapl(uuid32[2]),
+ swapl(uuid32[3]));
+ else
+ printf(" UUID : %08x:%08x:%08x:%08x\n",
+ uuid32[0],
+ uuid32[1],
+ uuid32[2],
+ uuid32[3]);
+
printf(" Events : %llu\n", (unsigned long long)sb->events);
printf(" Events Cleared : %llu\n", (unsigned long long)sb->events_cleared);
printf(" State : %s\n", bitmap_state(sb->state));
char *homehost = NULL;
char sys_hostname[256];
+ int require_homehost = 1;
char *mailaddr = NULL;
char *program = NULL;
int delay = 0;
ident.bitmap_fd = -1;
ident.bitmap_file = NULL;
ident.name[0] = 0;
+ ident.container = NULL;
+ ident.member = NULL;
while ((option_index = -1) ,
(opt=getopt_long(argc, argv,
shortopt, long_options,
&option_index)) != -1) {
int newmode = mode;
- /* firstly, some mode-independant options */
+ /* firstly, some mode-independent options */
switch(opt) {
case 'h':
if (option_index > 0 &&
continue;
case HomeHost:
- homehost = optarg;
+ if (strcasecmp(optarg, "<ignore>") == 0)
+ require_homehost = 0;
+ else
+ homehost = optarg;
continue;
case ':':
case 'o':
case 'w':
case 'W':
+ case Waitclean:
+ case DetailPlatform:
case 'K': if (!mode) newmode = MISC; break;
}
if (mode && newmode == mode) {
dv->writemostly = writemostly;
dv->re_add = re_add;
dv->used = 0;
+ dv->content = NULL;
dv->next = NULL;
*devlistend = dv;
devlistend = &dv->next;
dv->disposition = devmode;
dv->writemostly = writemostly;
dv->re_add = re_add;
+ dv->used = 0;
+ dv->content = NULL;
dv->next = NULL;
*devlistend = dv;
devlistend = &dv->next;
case O(GROW,'z'):
- case O(CREATE,'z'): /* size */
+ case O(CREATE,'z'):
+ case O(BUILD,'z'): /* size */
if (size >= 0) {
fprintf(stderr, Name ": size may only be specified once. "
"Second value is %s.\n", optarg);
exit(2);
case 5:
- case 6:
layout = map_name(r5layout, optarg);
if (layout==UnSet) {
fprintf(stderr, Name ": layout %s not understood for raid5.\n",
exit(2);
}
break;
+ case 6:
+ layout = map_name(r6layout, optarg);
+ if (layout==UnSet) {
+ fprintf(stderr, Name ": layout %s not understood for raid6.\n",
+ optarg);
+ exit(2);
+ }
+ break;
case 10:
/* 'f', 'o' or 'n' followed by a number <= raid_disks */
" 'summaries', 'homehost', 'byteorder', 'devicesize'.\n");
exit(outf == stdout ? 0 : 2);
+ case O(INCREMENTAL,NoDegraded):
case O(ASSEMBLE,NoDegraded): /* --no-degraded */
runstop = -1; /* --stop isn't allowed for --assemble,
* so we overload slightly */
case O(MISC,'o'):
case O(MISC,'w'):
case O(MISC,'W'):
+ case O(MISC, Waitclean):
+ case O(MISC, DetailPlatform):
if (devmode && devmode != opt &&
(devmode == 'E' || (opt == 'E' && devmode != 'Q'))) {
fprintf(stderr, Name ": --examine/-E cannot be given with -%c\n",
fprintf(stderr, Name ": --super-minor=dev is incompatible with --auto\n");
exit(2);
}
- if (mode == MANAGE || mode == GROW)
- autof=1; /* Don't create */
- mdfd = open_mddev(devlist->devname, autof);
- if (mdfd < 0)
+ if (mode == MANAGE || mode == GROW) {
+ mdfd = open_mddev(devlist->devname, 1);
+ if (mdfd < 0)
+ exit(1);
+ } else
+ /* non-existent device is OK */
+ mdfd = open_mddev(devlist->devname, 0);
+ if (mdfd == -2) {
+ fprintf(stderr, Name ": device %s exists but is not an "
+ "md array.\n", devlist->devname);
exit(1);
+ }
if ((int)ident.super_minor == -2) {
struct stat stb;
+ if (mdfd < 0) {
+ fprintf(stderr, Name ": --super-minor=dev given, and "
+ "listed device %s doesn't exist.\n",
+ devlist->devname);
+ exit(1);
+ }
fstat(mdfd, &stb);
ident.super_minor = minor(stb.st_rdev);
}
+ if (mdfd >= 0 && mode != MANAGE && mode != GROW) {
+ /* We don't really want this open yet, we just might
+ * have wanted to check some things
+ */
+ close(mdfd);
+ mdfd = -1;
+ }
}
if (raiddisks) {
}
if (homehost == NULL)
- homehost = conf_get_homehost();
- if (homehost && strcmp(homehost, "<system>")==0) {
+ homehost = conf_get_homehost(&require_homehost);
+ if (homehost == NULL || strcmp(homehost, "<system>")==0) {
if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
sys_hostname[sizeof(sys_hostname)-1] = 0;
homehost = sys_hostname;
}
}
+ ident.autof = autof;
+
rv = 0;
switch(mode) {
case MANAGE:
fprintf(stderr, Name ": %s not identified in config file.\n",
devlist->devname);
rv |= 1;
- } else {
- mdfd = open_mddev(devlist->devname,
- array_ident->autof ? array_ident->autof : autof);
- if (mdfd < 0)
- rv |= 1;
- else {
- rv |= Assemble(ss, devlist->devname, mdfd, array_ident,
- NULL, backup_file,
- readonly, runstop, update, homehost, verbose-quiet, force);
+ if (mdfd >= 0)
close(mdfd);
- }
+ } else {
+ if (array_ident->autof == 0)
+ array_ident->autof = autof;
+ rv |= Assemble(ss, devlist->devname, array_ident,
+ NULL, backup_file,
+ readonly, runstop, update,
+ homehost, require_homehost,
+ verbose-quiet, force);
}
} else if (!scan)
- rv = Assemble(ss, devlist->devname, mdfd, &ident,
+ rv = Assemble(ss, devlist->devname, &ident,
devlist->next, backup_file,
- readonly, runstop, update, homehost, verbose-quiet, force);
+ readonly, runstop, update,
+ homehost, require_homehost,
+ verbose-quiet, force);
else if (devs_found>0) {
if (update && devs_found > 1) {
fprintf(stderr, Name ": can only update a single array at a time\n");
rv |= 1;
continue;
}
- mdfd = open_mddev(dv->devname,
- array_ident->autof ?array_ident->autof : autof);
- if (mdfd < 0) {
- rv |= 1;
- continue;
- }
- rv |= Assemble(ss, dv->devname, mdfd, array_ident,
+ if (array_ident->autof == 0)
+ array_ident->autof = autof;
+ rv |= Assemble(ss, dv->devname, array_ident,
NULL, backup_file,
- readonly, runstop, update, homehost, verbose-quiet, force);
- close(mdfd);
+ readonly, runstop, update,
+ homehost, require_homehost,
+ verbose-quiet, force);
}
} else {
mddev_ident_t array_list = conf_get_ident(NULL);
exit(1);
}
for (; array_list; array_list = array_list->next) {
- mdu_array_info_t array;
- mdfd = open_mddev(array_list->devname,
- array_list->autof ? array_list->autof : autof);
- if (mdfd < 0) {
- rv |= 1;
+ if (array_list->devname &&
+ strcasecmp(array_list->devname, "<ignore>") == 0)
continue;
- }
- if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0)
- /* already assembled, skip */
- cnt++;
- else {
- rv |= Assemble(ss, array_list->devname, mdfd,
- array_list,
- NULL, NULL,
- readonly, runstop, NULL, homehost, verbose-quiet, force);
- if (rv == 0) cnt++;
- }
- close(mdfd);
- }
- if (homehost) {
+ if (array_list->autof == 0)
+ array_list->autof = autof;
+
+ rv |= Assemble(ss, array_list->devname,
+ array_list,
+ NULL, NULL,
+ readonly, runstop, NULL,
+ homehost, require_homehost,
+ verbose-quiet, force);
+ cnt++;
+ }
+ if (homehost && cnt == 0) {
/* Maybe we can auto-assemble something.
- * Repeatedly call Assemble in auto-assmble mode
+ * Repeatedly call Assemble in auto-assemble mode
* until it fails
*/
int rv2;
mddev_dev_t devlist = conf_get_devs();
acnt = 0;
do {
- rv2 = Assemble(ss, NULL, -1,
+ rv2 = Assemble(ss, NULL,
&ident,
devlist, NULL,
- readonly, runstop, NULL, homehost, verbose-quiet, force);
+ readonly, runstop, NULL,
+ homehost, require_homehost,
+ verbose-quiet, force);
if (rv2==0) {
cnt++;
acnt++;
do {
acnt = 0;
do {
- rv2 = Assemble(ss, NULL, -1,
+ rv2 = Assemble(ss, NULL,
&ident,
NULL, NULL,
- readonly, runstop, "homehost", homehost, verbose-quiet, force);
+ readonly, runstop, "homehost",
+ homehost, require_homehost,
+ verbose-quiet, force);
if (rv2==0) {
cnt++;
acnt++;
break;
}
}
- rv = Build(devlist->devname, mdfd, chunk, level, layout,
+ rv = Build(devlist->devname, chunk, level, layout,
raiddisks, devlist->next, assume_clean,
- bitmap_file, bitmap_chunk, write_behind, delay,
- verbose-quiet, size);
+ bitmap_file, bitmap_chunk, write_behind,
- delay, verbose-quiet, autof);
++ delay, verbose-quiet, autof, size);
break;
case CREATE:
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
break;
}
- rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size,
+ rv = Create(ss, devlist->devname, chunk, level, layout, size<0 ? 0 : size,
raiddisks, sparedisks, ident.name, homehost,
ident.uuid_set ? ident.uuid : NULL,
devs_found-1, devlist->next, runstop, verbose-quiet, force, assume_clean,
- bitmap_file, bitmap_chunk, write_behind, delay);
+ bitmap_file, bitmap_chunk, write_behind, delay, autof);
break;
case MISC:
if (devmode == 'E') {
rv = Examine(devlist, scan?(verbose>1?0:verbose+1):brief,
export, scan,
SparcAdjust, ss, homehost);
+ } else if (devmode == DetailPlatform) {
+ rv = Detail_Platform(ss ? ss->ss : NULL, ss ? scan : 1, verbose);
} else {
if (devlist == NULL) {
- if (devmode=='D' && scan) {
- /* apply --detail to all devices in /proc/mdstat */
+ if ((devmode=='D' || devmode == Waitclean) && scan) {
+ /* apply --detail or --wait-clean to
+ * all devices in /proc/mdstat
+ */
struct mdstat_ent *ms = mdstat_read(0, 1);
struct mdstat_ent *e;
+ struct map_ent *map = NULL;
+ int v = verbose>1?0:verbose+1;
+
for (e=ms ; e ; e=e->next) {
- char *name = get_md_name(e->devnum);
+ char *name;
+ struct map_ent *me;
+ me = map_by_devnum(&map, e->devnum);
+ if (me && me->path
+ && strcmp(me->path, "/unknown") != 0)
+ name = me->path;
+ else
+ name = get_md_name(e->devnum);
if (!name) {
fprintf(stderr, Name ": cannot find device file for %s\n",
e->dev);
continue;
}
- rv |= Detail(name, verbose>1?0:verbose+1,
- export, test, homehost);
+ if (devmode == 'D')
+ rv |= Detail(name, v,
+ export, test,
+ homehost);
+ else
+ rv |= WaitClean(name, v);
put_md_name(name);
}
free_mdstat(ms);
export, test, homehost);
continue;
case 'K': /* Zero superblock */
- rv |= Kill(dv->devname, force, quiet); continue;
+ rv |= Kill(dv->devname, force, quiet,0);
+ continue;
case 'Q':
rv |= Query(dv->devname); continue;
case 'X':
rv |= ExamineBitmap(dv->devname, brief, ss); continue;
case 'W':
rv |= Wait(dv->devname); continue;
+ case Waitclean:
+ rv |= WaitClean(dv->devname, verbose-quiet); continue;
}
mdfd = open_mddev(dv->devname, 1);
if (mdfd>=0) {
break;
}
rv = Incremental(devlist->devname, verbose-quiet, runstop,
- ss, homehost, autof);
+ ss, homehost, require_homehost, autof);
break;
case AUTODETECT:
autodetect();
#include "md_u.h"
#include "md_p.h"
#include "bitmap.h"
+#include "msg.h"
#include <endian.h>
/* Redhat don't like to #include <asm/byteorder.h>, and
#define __le16_to_cpu(_x) (_x)
#define __le32_to_cpu(_x) (_x)
#define __le64_to_cpu(_x) (_x)
+
+#define __cpu_to_be16(_x) bswap_16(_x)
+#define __cpu_to_be32(_x) bswap_32(_x)
+#define __cpu_to_be64(_x) bswap_64(_x)
+#define __be16_to_cpu(_x) bswap_16(_x)
+#define __be32_to_cpu(_x) bswap_32(_x)
+#define __be64_to_cpu(_x) bswap_64(_x)
#elif BYTE_ORDER == BIG_ENDIAN
#define __cpu_to_le16(_x) bswap_16(_x)
#define __cpu_to_le32(_x) bswap_32(_x)
#define __le16_to_cpu(_x) bswap_16(_x)
#define __le32_to_cpu(_x) bswap_32(_x)
#define __le64_to_cpu(_x) bswap_64(_x)
+
+#define __cpu_to_be16(_x) (_x)
+#define __cpu_to_be32(_x) (_x)
+#define __cpu_to_be64(_x) (_x)
+#define __be16_to_cpu(_x) (_x)
+#define __be32_to_cpu(_x) (_x)
+#define __be64_to_cpu(_x) (_x)
#else
# error "unknown endianness."
#endif
int uuid[4];
char name[33];
unsigned long long data_offset;
- unsigned long long component_size;
+ unsigned long long component_size; /* same as array.size, except in
+ * sectors and up to 64bits.
+ */
+ unsigned long long custom_array_size; /* size for non-default sized
+ * arrays (in sectors)
+ */
int reshape_active;
unsigned long long reshape_progress;
+ unsigned long long resync_start;
+ unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
int cache_size; /* size of raid456 stripe cache*/
int mismatch_cnt;
char text_version[50];
+ int container_member; /* for assembling external-metatdata arrays
+ * This is to be used internally by metadata
+ * handler only */
+
char sys_name[20];
struct mdinfo *devs;
struct mdinfo *next;
+
+ /* Device info for mdmon: */
+ int state_fd;
+ #define DS_FAULTY 1
+ #define DS_INSYNC 2
+ #define DS_WRITE_MOSTLY 4
+ #define DS_SPARE 8
+ #define DS_BLOCKED 16
+ #define DS_REMOVE 1024
+ #define DS_UNBLOCK 2048
+ int prev_state, curr_state, next_state;
+
};
struct createinfo {
AutoHomeHost,
Symlinks,
AutoDetect,
+ Waitclean,
+ DetailPlatform,
};
/* structures read from config file */
char *bitmap_file;
int bitmap_fd;
+ char *container; /* /dev/whatever name of container, or
+ * uuid of container. You would expect
+ * this to be the 'devname' or UUID
+ * of some other entry.
+ */
+ char *member; /* subarray within a container */
+
struct mddev_ident_s *next;
} *mddev_ident_t;
char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
char re_add;
char used; /* set when used */
+ struct mdinfo *content; /* If devname is a container, this might list
+ * the remaining member arrays. */
struct mddev_dev_s *next;
} *mddev_dev_t;
char *pattern; /* U or up, _ for down */
int percent; /* -1 if no resync */
int resync; /* 1 if resync, 0 if recovery */
+ int devcnt;
+ int raid_disks;
+ int chunk_size;
+ char * metadata_version;
struct mdstat_ent *next;
};
extern struct mdstat_ent *mdstat_read(int hold, int start);
extern void free_mdstat(struct mdstat_ent *ms);
extern void mdstat_wait(int seconds);
+extern void mdstat_wait_fd(int fd, const sigset_t *sigmask);
extern int mddev_busy(int devnum);
struct map_ent {
struct map_ent *next;
int devnum;
- int major,minor;
+ char metadata[20];
int uuid[4];
+ int bad;
char *path;
};
-extern int map_update(struct map_ent **mpp, int devnum, int major, int minor,
+extern int map_update(struct map_ent **mpp, int devnum, char *metadata,
int uuid[4], char *path);
extern struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]);
+extern struct map_ent *map_by_devnum(struct map_ent **map, int devnum);
+extern struct map_ent *map_by_name(struct map_ent **map, char *name);
extern void map_read(struct map_ent **melp);
extern int map_write(struct map_ent *mel);
extern void map_delete(struct map_ent **mapp, int devnum);
extern void map_free(struct map_ent *map);
extern void map_add(struct map_ent **melp,
- int devnum, int major, int minor, int uuid[4], char *path);
+ int devnum, char *metadata, int uuid[4], char *path);
+extern int map_lock(struct map_ent **melp);
+extern void map_unlock(struct map_ent **melp);
/* various details can be requested */
-#define GET_LEVEL 1
-#define GET_LAYOUT 2
-#define GET_COMPONENT 4
-#define GET_CHUNK 8
-#define GET_CACHE 16
-#define GET_MISMATCH 32
-#define GET_VERSION 64
-
-#define GET_DEVS 1024 /* gets role, major, minor */
-#define GET_OFFSET 2048
-#define GET_SIZE 4096
-#define GET_STATE 8192
-#define GET_ERROR 16384
+enum sysfs_read_flags {
+ GET_LEVEL = (1 << 0),
+ GET_LAYOUT = (1 << 1),
+ GET_COMPONENT = (1 << 2),
+ GET_CHUNK = (1 << 3),
+ GET_CACHE = (1 << 4),
+ GET_MISMATCH = (1 << 5),
+ GET_VERSION = (1 << 6),
+ GET_DISKS = (1 << 7),
+ GET_DEGRADED = (1 << 8),
+ GET_SAFEMODE = (1 << 9),
+ GET_DEVS = (1 << 10), /* gets role, major, minor */
+ GET_OFFSET = (1 << 11),
+ GET_SIZE = (1 << 12),
+ GET_STATE = (1 << 13),
+ GET_ERROR = (1 << 14),
+ SKIP_GONE_DEVS = (1 << 15),
+};
/* If fd >= 0, get the array it is open on,
* else use devnum. >=0 -> major9. <0.....
*/
+extern int sysfs_open(int devnum, char *devname, char *attr);
+extern void sysfs_init(struct mdinfo *mdi, int fd, int devnum);
extern void sysfs_free(struct mdinfo *sra);
extern struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options);
+extern int sysfs_attr_match(const char *attr, const char *str);
+extern int sysfs_match_word(const char *word, char **list);
extern int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val);
extern int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long val);
+extern int sysfs_uevent(struct mdinfo *sra, char *event);
extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *val);
+extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val, int size);
+extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
+extern int sysfs_set_array(struct mdinfo *info, int vers);
+extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd,
+ int in_sync);
+extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
+extern int sysfs_unique_holder(int devnum, long rdev);
+extern int load_sys(char *path, char *buf);
extern int save_stripes(int *source, unsigned long long *offsets,
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
-extern mapping_t r5layout[], pers[], modes[], faultylayout[];
+extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
extern char *map_dev(int major, int minor, int create);
+struct active_array;
+struct metadata_update;
+/* A superswitch provides entry point the a metadata handler.
+ *
+ * The super_switch primarily operates on some "metadata" that
+ * is accessed via the 'supertype'.
+ * This metadata has one of three possible sources.
+ * 1/ It is read from a single device. In this case it may not completely
+ * describe the array or arrays as some information might be on other
+ * devices.
+ * 2/ It is read from all devices in a container. In this case all
+ * information is present.
+ * 3/ It is created by ->init_super / ->add_to_super. In this case it will
+ * be complete once enough ->add_to_super calls have completed.
+ *
+ * When creating an array inside a container, the metadata will be
+ * formed by a combination of 2 and 3. The metadata or the array is read,
+ * then new information is added.
+ *
+ * The metadata must sometimes have a concept of a 'current' array
+ * and a 'current' device.
+ * The 'current' array is set by init_super to be the newly created array,
+ * or is set by super_by_fd when it finds it is looking at an array inside
+ * a container.
+ *
+ * The 'current' device is either the device that the metadata was read from
+ * in case 1, or the last device added by add_to_super in case 3.
+ * Case 2 does not identify a 'current' device.
+ */
extern struct superswitch {
+
+ /* Used to report details of metadata read from a component
+ * device. ->load_super has been called.
+ */
void (*examine_super)(struct supertype *st, char *homehost);
- void (*brief_examine_super)(struct supertype *st);
+ void (*brief_examine_super)(struct supertype *st, int verbose);
void (*export_examine_super)(struct supertype *st);
+
+ /* Used to report details of an active array.
+ * ->load_super was possibly given a 'component' string.
+ */
void (*detail_super)(struct supertype *st, char *homehost);
void (*brief_detail_super)(struct supertype *st);
void (*export_detail_super)(struct supertype *st);
+
+ /* Optional: platform hardware / firmware details */
+ int (*detail_platform)(int verbose, int enumerate_only);
+
+ /* Used:
+ * to get uuid to storing in bitmap metadata
+ * and 'reshape' backup-data metadata
+ * To see if a device is being re-added to an array it was part of.
+ */
void (*uuid_from_super)(struct supertype *st, int uuid[4]);
+
+ /* Extract generic details from metadata. This could be details about
+ * the container, or about an individual array within the container.
+ * The determination is made either by:
+ * load_super being given a 'component' string.
+ * validate_geometry determining what to create.
+ * The info includes both array information and device information.
+ * The particular device should be:
+ * The last device added by add_to_super
+ * The device the metadata was loaded from by load_super
+ */
void (*getinfo_super)(struct supertype *st, struct mdinfo *info);
+
+ /* Check if the given metadata is flagged as belonging to "this"
+ * host. 0 for 'no', 1 for 'yes', -1 for "Don't record homehost"
+ */
int (*match_home)(struct supertype *st, char *homehost);
+
+ /* Make one of several generic modifications to metadata
+ * prior to assembly (or other times).
+ * sparc2.2 - first bug in early 0.90 metadata
+ * super-minor - change name of 0.90 metadata
+ * summaries - 'correct' any redundant data
+ * resync - mark array as dirty to trigger a resync.
+ * uuid - set new uuid - only 0.90 or 1.x
+ * name - change the name of the array (where supported)
+ * homehost - change which host this array is tied to.
+ * devicesize - If metadata is at start of device, change recorded
+ * device size to match actual device size
+ * byteorder - swap bytes for 0.90 metadata
+ *
+ * force-one - mark that device as uptodate, not old or failed.
+ * force-array - mark array as clean if it would not otherwise
+ * assemble
+ * assemble - not sure how this is different from force-one...
+ * linear-grow-new - add a new device to a linear array, but don't
+ * change the size: so superblock still matches
+ * linear-grow-update - now change the size of the array.
+ */
int (*update_super)(struct supertype *st, struct mdinfo *info,
char *update,
char *devname, int verbose,
int uuid_set, char *homehost);
+
+ /* Create new metadata for new array as described. This could
+ * be a new container, or an array in a pre-existing container.
+ * Also used to zero metadata prior to writing it to invalidate old
+ * metadata.
+ */
int (*init_super)(struct supertype *st, mdu_array_info_t *info,
unsigned long long size, char *name,
char *homehost, int *uuid);
- void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo);
+
+ /* update the metadata to include new device, either at create or
+ * when hot-adding a spare.
+ */
+ int (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo,
+ int fd, char *devname);
+
+ /* Write metadata to one device when fixing problems or adding
+ * a new device.
+ */
int (*store_super)(struct supertype *st, int fd);
- int (*write_init_super)(struct supertype *st, mdu_disk_info_t *dinfo,
- char *devname);
+
+ /* Write all metadata for this array.
+ */
+ int (*write_init_super)(struct supertype *st);
int (*compare_super)(struct supertype *st, struct supertype *tst);
int (*load_super)(struct supertype *st, int fd, char *devname);
struct supertype * (*match_metadata_desc)(char *arg);
void (*locate_bitmap)(struct supertype *st, int fd);
int (*write_bitmap)(struct supertype *st, int fd);
void (*free_super)(struct supertype *st);
- int major;
+
+ /* validate_geometry is called with an st returned by
+ * match_metadata_desc.
+ * It should check that the geometry described in compatible with
+ * the metadata type. It will be called repeatedly as devices
+ * added to validate changing size and new devices. If there are
+ * inter-device dependencies, it should record sufficient details
+ * so these can be validated.
+ * Both 'size' and '*freesize' are in sectors. chunk is bytes.
+ */
+ int (*validate_geometry)(struct supertype *st, int level, int layout,
+ int raiddisks,
+ int chunk, unsigned long long size,
+ char *subdev, unsigned long long *freesize,
+ int verbose);
+
+ struct mdinfo *(*container_content)(struct supertype *st);
+ /* Allow a metadata handler to override mdadm's default layouts */
+ int (*default_layout)(int level); /* optional */
+
+/* for mdmon */
+ int (*open_new)(struct supertype *c, struct active_array *a,
+ char *inst);
+
+ /* Tell the metadata handler the current state of the array.
+ * This covers whether it is known to be consistent (no pending writes)
+ * and how far along a resync is known to have progressed
+ * (in a->resync_start).
+ * resync status is really irrelevant if the array is not consistent,
+ * but some metadata (DDF!) have a place to record the distinction.
+ * If 'consistent' is '2', then the array can mark it dirty if a
+ * resync/recovery/whatever is required, or leave it clean if not.
+ * Return value is 0 dirty (not consistent) and 1 if clean.
+ * it is only really important if consistent is passed in as '2'.
+ */
+ int (*set_array_state)(struct active_array *a, int consistent);
+
+ /* When the state of a device might have changed, we call set_disk to
+ * tell the metadata what the current state is.
+ * Typically this happens on spare->in_sync and (spare|in_sync)->faulty
+ * transitions.
+ * set_disk might be called when the state of the particular disk has
+ * not in fact changed.
+ */
+ void (*set_disk)(struct active_array *a, int n, int state);
+ void (*sync_metadata)(struct supertype *st);
+ void (*process_update)(struct supertype *st,
+ struct metadata_update *update);
+ void (*prepare_update)(struct supertype *st,
+ struct metadata_update *update);
+
+ /* activate_spare will check if the array is degraded and, if it
+ * is, try to find some spare space in the container.
+ * On success, it add appropriate updates (For process_update) to
+ * to the 'updates' list and returns a list of 'mdinfo' identifying
+ * the device, or devices as there might be multiple missing
+ * devices and multiple spares available.
+ */
+ struct mdinfo *(*activate_spare)(struct active_array *a,
+ struct metadata_update **updates);
+
int swapuuid; /* true if uuid is bigending rather than hostendian */
-} super0, super1, *superlist[];
+ int external;
+ const char *name; /* canonical metadata name */
+} super0, super1, super_ddf, *superlist[];
+
+extern struct superswitch super_imsm;
+
+struct metadata_update {
+ int len;
+ char *buf;
+ void *space; /* allocated space that monitor will use */
+ struct metadata_update *next;
+};
+/* A supertype holds a particular collection of metadata.
+ * It identifies the metadata type by the superswitch, and the particular
+ * sub-version of that metadata type.
+ * metadata read in or created is stored in 'sb' and 'info'.
+ * There are also fields used by mdmon to track containers.
+ *
+ * A supertype may refer to:
+ * Just an array, possibly in a container
+ * A container, not identifying any particular array
+ * Info read from just one device, not yet fully describing the array/container.
+ *
+ *
+ * A supertype is created by:
+ * super_by_fd
+ * guess_super
+ * dup_super
+ */
struct supertype {
struct superswitch *ss;
int minor_version;
int max_devs;
+ int container_dev; /* devnum of container */
+ char subarray[32]; /* name of array inside container */
void *sb;
+ void *info;
+ int loaded_container; /* Set if load_super found a container,
+ * not just one device */
+
+ struct metadata_update *updates;
+ struct metadata_update **update_tail;
+
+ /* extra stuff used by mdmon */
+ struct active_array *arrays;
+ int sock; /* listen to external programs */
+ int devnum;
+ char *devname; /* e.g. md0. This appears in metadata_verison:
+ * external:/md0/12
+ */
+ int devcnt;
+
+ struct mdinfo *devs;
+
};
extern struct supertype *super_by_fd(int fd);
extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
extern void get_one_disk(int mdfd, mdu_array_info_t *ainf,
mdu_disk_info_t *disk);
+void wait_for(char *dev, int fd);
#if __GNUC__ < 3
struct stat64;
int *fdlist, int cnt, char *backup_file);
-extern int Assemble(struct supertype *st, char *mddev, int mdfd,
+extern int Assemble(struct supertype *st, char *mddev,
mddev_ident_t ident,
mddev_dev_t devlist, char *backup_file,
int readonly, int runstop,
- char *update, char *homehost,
+ char *update, char *homehost, int require_homehost,
int verbose, int force);
-extern int Build(char *mddev, int mdfd, int chunk, int level, int layout,
- int raiddisks,
- mddev_dev_t devlist, int assume_clean,
+extern int Build(char *mddev, int chunk, int level, int layout,
+ int raiddisks, mddev_dev_t devlist, int assume_clean,
char *bitmap_file, int bitmap_chunk, int write_behind,
- int delay, int verbose, int autof);
- int delay, int verbose, unsigned long long size);
++ int delay, int verbose, int autof, unsigned long long size);
-extern int Create(struct supertype *st, char *mddev, int mdfd,
+extern int Create(struct supertype *st, char *mddev,
int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks,
char *name, char *homehost, int *uuid,
int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay);
+ char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof);
extern int Detail(char *dev, int brief, int export, int test, char *homehost);
+extern int Detail_Platform(struct superswitch *ss, int scan, int verbose);
extern int Query(char *dev);
extern int Examine(mddev_dev_t devlist, int brief, int export, int scan,
int SparcAdjust, struct supertype *forcest, char *homehost);
int period, int daemonise, int scan, int oneshot,
int dosyslog, int test, char *pidfile);
-extern int Kill(char *dev, int force, int quiet);
+extern int Kill(char *dev, int force, int quiet, int noexcl);
extern int Wait(char *dev);
+extern int WaitClean(char *dev, int verbose);
extern int Incremental(char *devname, int verbose, int runstop,
- struct supertype *st, char *homehost, int autof);
+ struct supertype *st, char *homehost, int require_homehost,
+ int autof);
+extern int Incremental_container(struct supertype *st, char *devname,
+ int verbose, int runstop, int autof,
+ int trustworthy);
extern void RebuildMap(void);
extern int IncrementalScan(int verbose);
extern int get_mdp_major(void);
extern int dev_open(char *dev, int flags);
+extern int open_dev(int devnum);
+extern int open_dev_excl(int devnum);
extern int is_standard(char *dev, int *nump);
+extern int same_dev(char *one, char *two);
extern int parse_auto(char *str, char *msg, int config);
extern mddev_ident_t conf_get_ident(char *dev);
extern mddev_dev_t conf_get_devs(void);
extern int conf_test_dev(char *devname);
+extern int conf_test_metadata(const char *version);
extern struct createinfo *conf_get_create_info(void);
extern void set_conffile(char *file);
extern char *conf_get_mailaddr(void);
extern char *conf_get_mailfrom(void);
extern char *conf_get_program(void);
-extern char *conf_get_homehost(void);
+extern char *conf_get_homehost(int *require_homehostp);
extern char *conf_line(FILE *file);
extern char *conf_word(FILE *file, int allow_key);
+extern int conf_name_is_free(char *name);
+extern int devname_matches(char *name, char *match);
+extern struct mddev_ident_s *conf_match(struct mdinfo *info, struct supertype *st);
+
extern void free_line(char *line);
extern int match_oneof(char *devices, char *devname);
extern void uuid_from_super(int uuid[4], mdp_super_t *super);
+extern const int uuid_match_any[4];
extern int same_uuid(int a[4], int b[4], int swapuuid);
extern void copy_uuid(void *a, int b[4], int swapuuid);
+extern char *fname_from_uuid(struct supertype *st,
+ struct mdinfo *info, char *buf, char sep);
extern unsigned long calc_csum(void *super, int bytes);
extern int enough(int level, int raid_disks, int layout, int clean,
char *avail, int avail_disks);
extern int ask(char *mesg);
extern unsigned long long get_component_size(int fd);
extern void remove_partitions(int fd);
-
+extern unsigned long long calc_array_size(int level, int raid_disks, int layout,
+ int chunksize, unsigned long long devsize);
+extern int flush_metadata_updates(struct supertype *st);
+extern void append_metadata_update(struct supertype *st, void *buf, int len);
+extern int assemble_container_content(struct supertype *st, int mdfd,
+ struct mdinfo *content, int runstop,
+ char *chosen_name, int verbose);
+
+extern int add_disk(int mdfd, struct supertype *st,
+ struct mdinfo *sra, struct mdinfo *info);
+extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
extern char *human_size(long long bytes);
extern char *human_size_brief(long long bytes);
extern char DefaultConfFile[];
-extern int open_mddev(char *dev, int autof);
-extern int open_mddev_devnum(char *devname, int devnum, char *name,
- char *chosen_name, int parts);
+extern int create_mddev(char *dev, char *name, int autof, int trustworthy,
+ char *chosen);
+/* values for 'trustworthy' */
+#define LOCAL 1
+#define FOREIGN 2
+#define METADATA 3
+extern int open_mddev(char *dev, int report_errors);
+extern int open_container(int fd);
+
+extern int mdmon_running(int devnum);
+extern int signal_mdmon(int devnum);
+extern int check_env(char *name);
+extern int start_mdmon(int devnum);
+
+extern char *devnum2devname(int num);
+extern int devname2devnum(char *name);
+extern int stat2devnum(struct stat *st);
+extern int fd2devnum(int fd);
+
+static inline int dev2major(int d)
+{
+ if (d >= 0)
+ return MD_MAJOR;
+ else
+ return get_mdp_major();
+}
+
+static inline int dev2minor(int d)
+{
+ if (d >= 0)
+ return d;
+ return (-1-d) << MdpMinorShift;
+}
+
+static inline int ROUND_UP(int a, int base)
+{
+ return ((a+base-1)/base)*base;
+}
+
+static inline int is_subarray(char *vers)
+{
+ /* The version string for a 'subarray' (an array in a container)
+ * is
+ * /containername/componentname for normal read-write arrays
+ * -containername/componentname for read-only arrays.
+ * containername is e.g. md0, md_d1
+ * componentname is dependant on the metadata. e.g. '1' 'S1' ...
+ */
+ return (*vers == '/' || *vers == '-');
+}
+#ifdef DEBUG
+#define dprintf(fmt, arg...) \
+ fprintf(stderr, fmt, ##arg)
+#else
+#define dprintf(fmt, arg...) \
+ ({ if (0) fprintf(stderr, fmt, ##arg); 0; })
+#endif
#include <assert.h>
#include <stdarg.h>
static inline int xasprintf(char **strp, const char *fmt, ...) {
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
+/* kernel module doesn't know about these */
+#define LEVEL_CONTAINER (-100)
+#define LEVEL_UNSUPPORTED (-200)
+
/* faulty stuff */
#define makedev(M,m) (((M)<<8) | (m))
#endif
-/* for raid5 */
+/* for raid4/5/6 */
#define ALGORITHM_LEFT_ASYMMETRIC 0
#define ALGORITHM_RIGHT_ASYMMETRIC 1
#define ALGORITHM_LEFT_SYMMETRIC 2
#define ALGORITHM_RIGHT_SYMMETRIC 3
+
+/* Define non-rotating (raid4) algorithms. These allow
+ * conversion of raid4 to raid5.
+ */
+#define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
+#define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
+
+/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
+ * Firstly, the exact positioning of the parity block is slightly
+ * different between the 'LEFT_*' modes of md and the "_N_*" modes
+ * of DDF.
+ * Secondly, or order of datablocks over which the Q syndrome is computed
+ * is different.
+ * Consequently we have different layouts for DDF/raid6 than md/raid6.
+ * These layouts are from the DDFv1.2 spec.
+ * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
+ * leaves RLQ=3 as 'Vendor Specific'
+ */
+
+#define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
+#define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
+#define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
+
+
+/* For every RAID5 algorithm we define a RAID6 algorithm
+ * with exactly the same layout for data and parity, and
+ * with the Q block always on the last device (N-1).
+ * This allows trivial conversion from RAID5 to RAID6
+ */
+#define ALGORITHM_LEFT_ASYMMETRIC_6 16
+#define ALGORITHM_RIGHT_ASYMMETRIC_6 17
+#define ALGORITHM_LEFT_SYMMETRIC_6 18
+#define ALGORITHM_RIGHT_SYMMETRIC_6 19
+#define ALGORITHM_PARITY_0_6 20
+#define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
+
return __cpu_to_le32(csum);
}
+static char abuf[4096+4096];
+static int aread(int fd, void *buf, int len)
+{
+ /* aligned read.
+ * On devices with a 4K sector size, we need to read
+ * the full sector and copy relevant bits into
+ * the buffer
+ */
+ int bsize;
+ char *b;
+ int n;
+ if (ioctl(fd, BLKSSZGET, &bsize) != 0 ||
+ bsize <= len)
+ return read(fd, buf, len);
+ if (bsize > 4096)
+ return -1;
+ b = (char*)(((long)(abuf+4096))&~4095UL);
+
+ n = read(fd, b, bsize);
+ if (n <= 0)
+ return n;
+ lseek(fd, len - n, 1);
+ if (n > len)
+ n = len;
+ memcpy(buf, b, n);
+ return n;
+}
+
+static int awrite(int fd, void *buf, int len)
+{
+ /* aligned write.
+ * On devices with a 4K sector size, we need to write
+ * the full sector. We pre-read if the sector is larger
+ * than the write.
+ * The address must be sector-aligned.
+ */
+ int bsize;
+ char *b;
+ int n;
+ if (ioctl(fd, BLKSSZGET, &bsize) != 0 ||
+ bsize <= len)
+ return write(fd, buf, len);
+ if (bsize > 4096)
+ return -1;
+ b = (char*)(((long)(abuf+4096))&~4095UL);
+
+ n = read(fd, b, bsize);
+ if (n <= 0)
+ return n;
+ lseek(fd, -n, 1);
+ memcpy(b, buf, len);
+ n = write(fd, b, bsize);
+ if (n <= 0)
+ return n;
+ lseek(fd, len - n, 1);
+ return len;
+}
+
#ifndef MDASSEMBLE
static void examine_super1(struct supertype *st, char *homehost)
{
struct mdp_superblock_1 *sb = st->sb;
time_t atime;
int d;
- int faulty;
+ int role;
int i;
char *c;
int l = homehost ? strlen(homehost) : 0;
default: break;
}
printf("\n");
+#if 0
+ /* This turns out to just be confusing */
printf(" Array Slot : %d (", __le32_to_cpu(sb->dev_number));
for (i= __le32_to_cpu(sb->max_dev); i> 0 ; i--)
if (__le16_to_cpu(sb->dev_roles[i-1]) != 0xffff)
else printf("%d", role);
}
printf(")\n");
+#endif
+ printf(" Device Role : ");
+ d = __le32_to_cpu(sb->dev_number);
+ if (d < sb->raid_disks)
+ role = __le16_to_cpu(sb->dev_roles[d]);
+ else
+ role = 0xFFFF;
+ if (role >= 0xFFFE)
+ printf("spare\n");
+ else
+ printf("Active device %d\n", role);
+
printf(" Array State : ");
for (d=0; d<__le32_to_cpu(sb->raid_disks); d++) {
int cnt = 0;
}
}
if (cnt > 1) printf("?");
- else if (cnt == 1 && me) printf("U");
- else if (cnt == 1) printf("u");
- else printf ("_");
+ else if (cnt == 1) printf("A");
+ else printf (".");
}
+#if 0
+ /* This is confusing too */
faulty = 0;
for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
int role = __le16_to_cpu(sb->dev_roles[i]);
faulty++;
}
if (faulty) printf(" %d failed", faulty);
+#endif
+ printf(" ('A' == active, '.' == missing)");
printf("\n");
}
-static void brief_examine_super1(struct supertype *st)
+static void brief_examine_super1(struct supertype *st, int verbose)
{
struct mdp_superblock_1 *sb = st->sb;
int i;
else if (sb->set_name[0])
nm = sb->set_name;
else
- nm = "??";
+ nm = NULL;
- printf("ARRAY /dev/md/%s level=%s ", nm, c?c:"-unknown-");
+ printf("ARRAY%s%s", nm ? " /dev/md/":"", nm);
+ if (verbose && c)
+ printf(" level=%s", c);
sb_offset = __le64_to_cpu(sb->super_offset);
if (sb_offset <= 4)
- printf("metadata=1.1 ");
+ printf(" metadata=1.1 ");
else if (sb_offset <= 8)
- printf("metadata=1.2 ");
+ printf(" metadata=1.2 ");
else
- printf("metadata=1.0 ");
- printf("num-devices=%d UUID=", __le32_to_cpu(sb->raid_disks));
+ printf(" metadata=1.0 ");
+ if (verbose)
+ printf("num-devices=%d ", __le32_to_cpu(sb->raid_disks));
+ printf("UUID=");
for (i=0; i<16; i++) {
if ((i&3)==0 && i != 0) printf(":");
printf("%02x", sb->set_uuid[i]);
}
if (len)
printf("MD_NAME=%.*s\n", len, sb->set_name);
- printf("MD_UUID=");
- for (i=0; i<16; i++) {
- if ((i&3)==0 && i != 0) printf(":");
- printf("%02x", sb->set_uuid[i]);
- }
- printf("\n");
}
#endif
int role;
info->array.major_version = 1;
- info->array.minor_version = __le32_to_cpu(sb->feature_map);
+ info->array.minor_version = st->minor_version;
info->array.patch_version = 0;
info->array.raid_disks = __le32_to_cpu(sb->raid_disks);
info->array.level = __le32_to_cpu(sb->level);
info->disk.raid_disk = role;
}
info->events = __le64_to_cpu(sb->events);
+ sprintf(info->text_version, "1.%d", st->minor_version);
+ info->safe_mode_delay = 200;
memcpy(info->uuid, sb->set_uuid, 16);
if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
read(rfd, sb->device_uuid, 16) != 16) {
- *(__u32*)(sb->device_uuid) = random();
- *(__u32*)(sb->device_uuid+4) = random();
- *(__u32*)(sb->device_uuid+8) = random();
- *(__u32*)(sb->device_uuid+12) = random();
+ __u32 r[4] = {random(), random(), random(), random()};
+ memcpy(sb->device_uuid, r, 16);
}
sb->dev_roles[i] =
__le64_to_cpu(sb->data_offset)) {
/* set data_size to device size less data_offset */
struct misc_dev_info *misc = (struct misc_dev_info*)
- (st->sb + 1024 + sizeof(struct bitmap_super_s));
+ (st->sb + 1024 + 512);
printf("Size was %llu\n", (unsigned long long)
__le64_to_cpu(sb->data_size));
sb->data_size = __cpu_to_le64(
static int init_super1(struct supertype *st, mdu_array_info_t *info,
unsigned long long size, char *name, char *homehost, int *uuid)
{
- struct mdp_superblock_1 *sb = malloc(1024 + sizeof(bitmap_super_t) +
- sizeof(struct misc_dev_info));
+ struct mdp_superblock_1 *sb;
int spares;
int rfd;
char defname[10];
+
+ if (posix_memalign((void**)&sb, 512, (1024 + 512 +
+ sizeof(struct misc_dev_info))) != 0) {
+ fprintf(stderr, Name
+ ": %s could not allocate superblock\n", __func__);
+ return 0;
+ }
memset(sb, 0, 1024);
st->sb = sb;
- if (info->major_version == -1) {
+ if (info == NULL) {
/* zeroing superblock */
return 0;
}
else {
if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
read(rfd, sb->set_uuid, 16) != 16) {
- *(__u32*)(sb->set_uuid) = random();
- *(__u32*)(sb->set_uuid+4) = random();
- *(__u32*)(sb->set_uuid+8) = random();
- *(__u32*)(sb->set_uuid+12) = random();
+ __u32 r[4] = {random(), random(), random(), random()};
+ memcpy(sb->set_uuid, r, 16);
}
if (rfd >= 0) close(rfd);
}
return 1;
}
+struct devinfo {
+ int fd;
+ char *devname;
+ mdu_disk_info_t disk;
+ struct devinfo *next;
+};
+#ifndef MDASSEMBLE
/* Add a device to the superblock being created */
-static void add_to_super1(struct supertype *st, mdu_disk_info_t *dk)
+static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
+ int fd, char *devname)
{
struct mdp_superblock_1 *sb = st->sb;
__u16 *rp = sb->dev_roles + dk->number;
+ struct devinfo *di, **dip;
+
if ((dk->state & 6) == 6) /* active, sync */
*rp = __cpu_to_le16(dk->raid_disk);
else if ((dk->state & ~2) == 0) /* active or idle -> spare */
*rp = 0xffff;
else
*rp = 0xfffe;
+
if (dk->number >= __le32_to_cpu(sb->max_dev) &&
__le32_to_cpu(sb->max_dev) < 384)
sb->max_dev = __cpu_to_le32(dk->number+1);
+
+ sb->dev_number = __cpu_to_le32(dk->number);
+ sb->sb_csum = calc_sb_1_csum(sb);
+
+ dip = (struct devinfo **)&st->info;
+ while (*dip)
+ dip = &(*dip)->next;
+ di = malloc(sizeof(struct devinfo));
+ di->fd = fd;
+ di->devname = devname;
+ di->disk = *dk;
+ di->next = NULL;
+ *dip = di;
+
+ return 0;
}
+#endif
static void locate_bitmap1(struct supertype *st, int fd);
return 3;
sbsize = sizeof(*sb) + 2 * __le32_to_cpu(sb->max_dev);
+ sbsize = (sbsize+511)&(~511UL);
- if (write(fd, sb, sbsize) != sbsize)
+ if (awrite(fd, sb, sbsize) != sbsize)
return 4;
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
(((char*)sb)+1024);
if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
locate_bitmap1(st, fd);
- if (write(fd, bm, sizeof(*bm)) != sizeof(*bm))
+ if (awrite(fd, bm, sizeof(*bm)) !=
+ sizeof(*bm))
return 5;
}
}
return 4*2;
}
-static int write_init_super1(struct supertype *st,
- mdu_disk_info_t *dinfo, char *devname)
+#ifndef MDASSEMBLE
+static int write_init_super1(struct supertype *st)
{
struct mdp_superblock_1 *sb = st->sb;
struct supertype refst;
- int fd = open(devname, O_RDWR | O_EXCL);
int rfd;
- int rv;
+ int rv = 0;
int bm_space;
-
+ struct devinfo *di;
unsigned long long dsize, array_size;
long long sb_offset;
+ for (di = st->info; di && ! rv ; di = di->next) {
+ if (di->disk.state == 1)
+ continue;
+ if (di->fd < 0)
+ continue;
- if (fd < 0) {
- fprintf(stderr, Name ": Failed to open %s to write superblock\n",
- devname);
- return -1;
- }
+ Kill(di->devname, 0, 1, 1);
+ Kill(di->devname, 0, 1, 1);
- sb->dev_number = __cpu_to_le32(dinfo->number);
- if (dinfo->state & (1<<MD_DISK_WRITEMOSTLY))
- sb->devflags |= __cpu_to_le32(WriteMostly1);
+ sb->dev_number = __cpu_to_le32(di->disk.number);
+ if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY))
+ sb->devflags |= __cpu_to_le32(WriteMostly1);
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->device_uuid, r, 16);
- }
-
- if (rfd >= 0) close(rfd);
- sb->events = 0;
-
- refst =*st;
- refst.sb = NULL;
- if (load_super1(&refst, fd, NULL)==0) {
- struct mdp_superblock_1 *refsb = refst.sb;
-
- memcpy(sb->device_uuid, refsb->device_uuid, 16);
- if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
- /* same array, so preserve events and dev_number */
- sb->events = refsb->events;
- /* bugs in 2.6.17 and earlier mean the dev_number
- * chosen in Manage must be preserved
- */
- if (get_linux_version() >= 2006018)
- sb->dev_number = refsb->dev_number;
+ if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+ read(rfd, sb->device_uuid, 16) != 16) {
- *(__u32*)(sb->device_uuid) = random();
- *(__u32*)(sb->device_uuid+4) = random();
- *(__u32*)(sb->device_uuid+8) = random();
- *(__u32*)(sb->device_uuid+12) = random();
++ __u32 r[4] = {random(), random(), random(), random()};
++ memcpy(sb->device_uuid, r, 16);
+ }
- if (rfd >= 0) close(rfd);
++ if (rfd >= 0)
++ close(rfd);
++
+ sb->events = 0;
+
+ refst =*st;
+ refst.sb = NULL;
+ if (load_super1(&refst, di->fd, NULL)==0) {
+ struct mdp_superblock_1 *refsb = refst.sb;
+
+ memcpy(sb->device_uuid, refsb->device_uuid, 16);
+ if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
+ /* same array, so preserve events and
+ * dev_number */
+ sb->events = refsb->events;
+ /* bugs in 2.6.17 and earlier mean the
+ * dev_number chosen in Manage must be preserved
+ */
+ if (get_linux_version() >= 2006018)
+ sb->dev_number = refsb->dev_number;
+ }
+ free(refsb);
}
- free(refsb);
- }
- if (!get_dev_size(fd, NULL, &dsize))
- return 1;
- dsize >>= 9;
+ if (!get_dev_size(di->fd, NULL, &dsize))
+ return 1;
+ dsize >>= 9;
- if (dsize < 24) {
- close(fd);
- return 2;
- }
+ if (dsize < 24) {
+ close(di->fd);
+ return 2;
+ }
- /*
- * Calculate the position of the superblock.
- * It is always aligned to a 4K boundary and
- * depending on minor_version, it can be:
- * 0: At least 8K, but less than 12K, from end of device
- * 1: At start of device
- * 2: 4K from start of device.
- * Depending on the array size, we might leave extra space
- * for a bitmap.
- */
- array_size = __le64_to_cpu(sb->size);
- /* work out how much space we left for a bitmap */
- bm_space = choose_bm_space(array_size);
-
- switch(st->minor_version) {
- case 0:
- sb_offset = dsize;
- sb_offset -= 8*2;
- sb_offset &= ~(4*2-1);
- sb->super_offset = __cpu_to_le64(sb_offset);
- sb->data_offset = __cpu_to_le64(0);
+ /*
+ * Calculate the position of the superblock.
+ * It is always aligned to a 4K boundary and
+ * depending on minor_version, it can be:
+ * 0: At least 8K, but less than 12K, from end of device
+ * 1: At start of device
+ * 2: 4K from start of device.
+ * Depending on the array size, we might leave extra space
+ * for a bitmap.
+ */
+ array_size = __le64_to_cpu(sb->size);
+ /* work out how much space we left for a bitmap */
+ bm_space = choose_bm_space(array_size);
+
+ switch(st->minor_version) {
+ case 0:
+ sb_offset = dsize;
+ sb_offset -= 8*2;
+ sb_offset &= ~(4*2-1);
+ sb->super_offset = __cpu_to_le64(sb_offset);
+ sb->data_offset = __cpu_to_le64(0);
if (sb_offset - bm_space < array_size)
bm_space = sb_offset - array_size;
- sb->data_size = __cpu_to_le64(sb_offset - bm_space);
- break;
- case 1:
- sb->super_offset = __cpu_to_le64(0);
- if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
- bm_space = dsize - __le64_to_cpu(sb->size) - 4*2;
- sb->data_offset = __cpu_to_le64(bm_space + 4*2);
- sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
- break;
- case 2:
- sb_offset = 4*2;
- sb->super_offset = __cpu_to_le64(4*2);
- if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
- bm_space = dsize - __le64_to_cpu(sb->size) - 4*2 - 4*2;
- sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
- sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2 - bm_space );
- break;
- default:
- return -EINVAL;
- }
+ sb->data_size = __cpu_to_le64(sb_offset - bm_space);
+ break;
+ case 1:
+ sb->super_offset = __cpu_to_le64(0);
+ if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
+ bm_space = dsize - __le64_to_cpu(sb->size) -4*2;
+ sb->data_offset = __cpu_to_le64(bm_space + 4*2);
+ sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
+ break;
+ case 2:
+ sb_offset = 4*2;
+ sb->super_offset = __cpu_to_le64(4*2);
+ if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size)
+ > dsize)
+ bm_space = dsize - __le64_to_cpu(sb->size)
+ - 4*2 - 4*2;
+ sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
+ sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2
+ - bm_space );
+ break;
+ default:
+ return -EINVAL;
+ }
- sb->sb_csum = calc_sb_1_csum(sb);
- rv = store_super1(st, fd);
- if (rv)
- fprintf(stderr, Name ": failed to write superblock to %s\n", devname);
+ sb->sb_csum = calc_sb_1_csum(sb);
+ rv = store_super1(st, di->fd);
+ if (rv)
+ fprintf(stderr,
+ Name ": failed to write superblock to %s\n",
+ di->devname);
- if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
- rv = st->ss->write_bitmap(st, fd);
- close(fd);
+ if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
+ rv = st->ss->write_bitmap(st, di->fd);
+ close(di->fd);
+ di->fd = -1;
+ }
return rv;
}
+#endif
static int compare_super1(struct supertype *st, struct supertype *tst)
{
return 1;
if (!first) {
- first = malloc(1024+sizeof(bitmap_super_t) +
- sizeof(struct misc_dev_info));
- memcpy(first, second, 1024+sizeof(bitmap_super_t) +
+ if (posix_memalign((void**)&first, 512,
+ 1024 + 512 +
+ sizeof(struct misc_dev_info)) != 0) {
+ fprintf(stderr, Name
+ ": %s could not allocate superblock\n", __func__);
+ return 1;
+ }
+ memcpy(first, second, 1024 + 512 +
sizeof(struct misc_dev_info));
st->sb = first;
return 0;
free_super1(st);
+ if (st->subarray[0])
+ return 1;
+
if (st->ss == NULL || st->minor_version == -1) {
int bestvers = -1;
struct supertype tst;
__u64 bestctime = 0;
/* guess... choose latest ctime */
+ memset(&tst, 0, sizeof(tst));
tst.ss = &super1;
- tst.sb = NULL;
for (tst.minor_version = 0; tst.minor_version <= 2 ; tst.minor_version++) {
switch(load_super1(&tst, fd, devname)) {
case 0: super = tst.sb;
return 1;
}
- super = malloc(1024 + sizeof(bitmap_super_t) +
- sizeof(struct misc_dev_info));
+ if (posix_memalign((void**)&super, 512,
+ 1024 + 512 +
+ sizeof(struct misc_dev_info)) != 0) {
+ fprintf(stderr, Name ": %s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
- if (read(fd, super, 1024) != 1024) {
+ if (aread(fd, super, 1024) != 1024) {
if (devname)
fprintf(stderr, Name ": Cannot read superblock on %s\n",
devname);
bsb = (struct bitmap_super_s *)(((char*)super)+1024);
- misc = (struct misc_dev_info*) (bsb+1);
+ misc = (struct misc_dev_info*) (((char*)super)+1024+512);
misc->device_size = dsize;
/* Now check on the bitmap superblock */
* should get that written out.
*/
locate_bitmap1(st, fd);
- if (read(fd, ((char*)super)+1024, sizeof(struct bitmap_super_s))
- != sizeof(struct bitmap_super_s))
+ if (aread(fd, ((char*)super)+1024, 512)
+ != 512)
goto no_bitmap;
uuid_from_super1(st, uuid);
struct supertype *st = malloc(sizeof(*st));
if (!st) return st;
+ memset(st, 0, sizeof(*st));
st->ss = &super1;
st->max_devs = 384;
st->sb = NULL;
- /* Eliminate pointless leading 0 from some versions of mdadm -D */
- if (strncmp(arg, "01.", 3) == 0)
+ /* leading zeros can be safely ignored. --detail generates them. */
+ while (*arg == '0')
arg++;
- if (strcmp(arg, "1.0") == 0) {
+ if (strcmp(arg, "1.0") == 0 ||
+ strcmp(arg, "1.00") == 0) {
st->minor_version = 0;
return st;
}
- if (strcmp(arg, "1.1") == 0) {
+ if (strcmp(arg, "1.1") == 0 ||
+ strcmp(arg, "1.01") == 0) {
st->minor_version = 1;
return st;
}
- if (strcmp(arg, "1.2") == 0) {
+ if (strcmp(arg, "1.2") == 0 ||
+ strcmp(arg, "1.02") == 0) {
st->minor_version = 2;
return st;
}
if (strcmp(arg, "1") == 0 ||
- strcmp(arg, "default/large") == 0) {
+ strcmp(arg, "default") == 0) {
st->minor_version = -1;
return st;
}
int rv = 0;
int towrite, n;
- char buf[4096];
+ char *buf = (char*)(((long)(abuf+4096))&~4095UL);
locate_bitmap1(st, fd);
- if (write(fd, ((char*)sb)+1024, sizeof(bitmap_super_t)) !=
- sizeof(bitmap_super_t))
- return -2;
+ memset(buf, 0xff, 4096);
+ memcpy(buf, ((char*)sb)+1024, sizeof(bitmap_super_t));
+
towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
towrite = (towrite+7) >> 3; /* bits to bytes */
- memset(buf, 0xff, sizeof(buf));
+ towrite += sizeof(bitmap_super_t);
+ towrite = ROUND_UP(towrite, 512);
while (towrite > 0) {
n = towrite;
- if (n > sizeof(buf))
- n = sizeof(buf);
+ if (n > 4096)
+ n = 4096;
n = write(fd, buf, n);
if (n > 0)
towrite -= n;
else
break;
+ memset(buf, 0xff, 4096);
}
fsync(fd);
if (towrite)
st->sb = NULL;
}
+#ifndef MDASSEMBLE
+static int validate_geometry1(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ unsigned long long ldsize;
+ int fd;
+
+ if (level == LEVEL_CONTAINER)
+ return 0;
+ if (!subdev)
+ return 1;
+
+ fd = open(subdev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": super1.x cannot open %s: %s\n",
+ subdev, strerror(errno));
+ return 0;
+ }
+
+ if (!get_dev_size(fd, subdev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ *freesize = avail_size1(st, ldsize >> 9);
+ return 1;
+}
+#endif /* MDASSEMBLE */
+
struct superswitch super1 = {
#ifndef MDASSEMBLE
.examine_super = examine_super1,
.detail_super = detail_super1,
.brief_detail_super = brief_detail_super1,
.export_detail_super = export_detail_super1,
+ .write_init_super = write_init_super1,
+ .validate_geometry = validate_geometry1,
+ .add_to_super = add_to_super1,
#endif
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,
.getinfo_super = getinfo_super1,
.update_super = update_super1,
.init_super = init_super1,
- .add_to_super = add_to_super1,
.store_super = store_super1,
- .write_init_super = write_init_super1,
.compare_super = compare_super1,
.load_super = load_super1,
.match_metadata_desc = match_metadata_desc1,
.locate_bitmap = locate_bitmap1,
.write_bitmap = write_bitmap1,
.free_super = free_super1,
- .major = 1,
#if __BYTE_ORDER == BIG_ENDIAN
.swapuuid = 0,
#else
.swapuuid = 1,
#endif
+ .name = "1.x",
};