From b5e64645037e99b5f05c9499b27b422ae60d23a9 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 1 Nov 2004 04:49:34 +0000 Subject: [PATCH] mdadm-1.8.0 --- ANNOUNCE-1.8.0 | 37 +++++++ Build.c | 32 +++++- ChangeLog | 12 ++- Create.c | 51 +++++++--- Examine.c | 3 +- Makefile | 41 +++++--- Manage.c | 17 ++++ Monitor.c | 17 +++- ReadMe.c | 28 +++++- inventory | 4 + md.4 | 27 +++++ mdadm.8 | 53 +++++++++- mdadm.c | 240 +++++++++++---------------------------------- mdadm.h | 34 ++++++- mdadm.spec | 2 +- mdassemble.c | 10 +- mdopen.c | 193 ++++++++++++++++++++++++++++++++++++ misc/syslog-events | 27 +++++ util.c | 35 +++++-- 19 files changed, 631 insertions(+), 232 deletions(-) create mode 100644 ANNOUNCE-1.8.0 create mode 100644 mdopen.c create mode 100644 misc/syslog-events diff --git a/ANNOUNCE-1.8.0 b/ANNOUNCE-1.8.0 new file mode 100644 index 00000000..f4f36d7b --- /dev/null +++ b/ANNOUNCE-1.8.0 @@ -0,0 +1,37 @@ +Subject: ANNOUNCE: mdadm 1.8.0 - A tool for managing Soft RAID under Linux + + +I am pleased to announce the availability of + mdadm version 1.8.0 +It is available at + http://www.cse.unsw.edu.au/~neilb/source/mdadm/ +and + http://www.{countrycode}.kernel.org/pub/linux/utils/raid/mdadm/ + +as a source tar-ball and (at the first site) as an SRPM, and as an RPM for i386. + +mdadm is a tool for creating, managing and monitoring +device arrays using the "md" driver in Linux, also +known as Software RAID arrays. + +Release 1.8.0 adds: + - --pid-file option to declare a file to record the pid in for + --monitor --daemonise + - Support for new "faulty" personalitiy (see md.4) - not submited to + kernel.org yet. + - support for raid0 and linear over devices larger than 2 Terabytes. + - assorted bug fixes. + +It is hoped that the next full release of mdadm will be 2.0.0 +and it will have substantially re-written handling for superblocks and +array creation. In particular, it will be able to work with the new +superblock format (version 1) supported by 2.6. +1.8.1 may be released soon which contains much of this functionality. + +Development of mdadm is sponsored by CSE@UNSW: + The School of Computer Science and Engineering +at + The University of New South Wales + +NeilBrown 01 November 2004 + diff --git a/Build.c b/Build.c index 3e182f8c..bf975337 100644 --- a/Build.c +++ b/Build.c @@ -33,7 +33,7 @@ #define START_MD _IO (MD_MAJOR, 2) #define STOP_MD _IO (MD_MAJOR, 3) -int Build(char *mddev, int mdfd, int chunk, int level, +int Build(char *mddev, int mdfd, int chunk, int level, int layout, int raiddisks, mddev_dev_t devlist, int assume_clean) { @@ -50,6 +50,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, * SET_ARRAY_INFO, ADD_NEW_DISK, RUN_ARRAY * */ + int verbose = 0; int i; int vers; struct stat stb; @@ -77,6 +78,34 @@ int Build(char *mddev, int mdfd, int chunk, int level, return 1; } + if (layout == UnSet) + switch(level) { + default: /* no layout */ + layout = 0; + break; + case 10: + layout = 0x102; /* near=2, far=1 */ + if (verbose) + fprintf(stderr, + Name ": layout defaults to n1\n"); + break; + case 5: + case 6: + layout = map_name(r5layout, "default"); + if (verbose) + fprintf(stderr, + Name ": layout defaults to %s\n", map_num(r5layout, layout)); + break; + case LEVEL_FAULTY: + layout = map_name(faultylayout, "default"); + + if (verbose) + fprintf(stderr, + Name ": layout defaults to %s\n", map_num(faultylayout, layout)); + break; + } + + vers = md_get_version(mdfd); /* looks Ok, go for it */ @@ -100,6 +129,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, if (chunk == 0) chunk = 64; array.chunk_size = chunk*1024; + array.layout = layout; if (ioctl(mdfd, SET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n", mddev, strerror(errno)); diff --git a/ChangeLog b/ChangeLog index df5b25b1..6e0ca01c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,14 @@ -Changes Prior to this release +Changes Prior to 1.8.0 release + - Makefile cleanup from Luca Berra + - --pid-file (-i) to set a pid file to use with --monitor --daemonise + - Fix typo in mdadm man page + - Fix coredump when "-s" used with no config file present. + - Support new "faulty" personality which can inject synthetic + faults. (Not in kernel.org yet at 1Nov2004) + - Support raid0/linear on devices > 2 Terabytes + - Make sure raid6 resyncs when created with one missing device + +Changes Prior to 1.7.0 release - Support "--grow --add" to add a device to a linear array, if the kernel supports it. Not documented yet. - Restore support for uclibc which was broken recently. diff --git a/Create.c b/Create.c index 64bf0e2f..7b4988e6 100644 --- a/Create.c +++ b/Create.c @@ -52,7 +52,7 @@ int Create(char *mddev, int mdfd, * if runstop==run, or raiddisks diskswere used, * RUN_ARRAY */ - unsigned long minsize=0, maxsize=0; + unsigned long long minsize=0, maxsize=0; char *mindisc = NULL; char *maxdisc = NULL; int dnum; @@ -130,6 +130,13 @@ int Create(char *mddev, int mdfd, fprintf(stderr, Name ": layout defaults to %s\n", map_num(r5layout, layout)); break; + case LEVEL_FAULTY: + layout = map_name(faultylayout, "default"); + + if (verbose) + fprintf(stderr, + Name ": layout defaults to %s\n", map_num(faultylayout, layout)); + break; } if (level == 10) @@ -168,7 +175,8 @@ int Create(char *mddev, int mdfd, dnum = 0; for (dv=devlist; dv; dv=dv->next, dnum++) { char *dname = dv->devname; - unsigned long dsize, freesize; + unsigned long dsize; + unsigned long long ldsize, freesize; int fd; if (strcasecmp(dname, "missing")==0) { if (first_missing > dnum) @@ -186,6 +194,11 @@ int Create(char *mddev, int mdfd, fail=1; continue; } +#ifdef BLKGETSIZE64 + if (ioctl(fd, BLKGETSIZE64, &ldsize)==0) + ; + else +#endif if (ioctl(fd, BLKGETSIZE, &dsize)) { fprintf(stderr, Name ": Cannot get size of %s: %s\n", dname, strerror(errno)); @@ -193,19 +206,23 @@ int Create(char *mddev, int mdfd, close(fd); continue; } - if (dsize < MD_RESERVED_SECTORS*2) { + else { + ldsize = dsize; + dsize <<= 9; + } + if (ldsize < MD_RESERVED_SECTORS*2LL*512LL) { fprintf(stderr, Name ": %s is too small: %luK\n", - dname, dsize/2); + dname, (unsigned long)(ldsize>>10)); fail = 1; close(fd); continue; } - freesize = MD_NEW_SIZE_SECTORS(dsize); + freesize = MD_NEW_SIZE_SECTORS((ldsize>>9)); freesize /= 2; if (size && freesize < size) { fprintf(stderr, Name ": %s is smaller that given size." - " %luK < %luK + superblock\n", dname, freesize, size); + " %lluK < %luK + superblock\n", dname, freesize, size); fail = 1; close(fd); continue; @@ -232,11 +249,18 @@ int Create(char *mddev, int mdfd, fprintf(stderr, Name ": no size and no drives given - aborting create.\n"); return 1; } - size = minsize; - if (verbose && level>0) - fprintf(stderr, Name ": size set to %luK\n", size); + if (level > 0) { + /* size is meaningful */ + if (minsize > 0x100000000ULL) { + fprintf(stderr, Name ": devices too large for RAID level %d\n", level); + return 1; + } + size = minsize; + if (verbose) + fprintf(stderr, Name ": size set to %luK\n", size); + } } - if (level >= 1 && ((maxsize-size)*100 > maxsize)) { + if (level > 0 && ((maxsize-size)*100 > maxsize)) { fprintf(stderr, Name ": largest drive (%s) exceed size (%luK) by more than 1%%\n", maxdisc, size); warn = 1; @@ -284,8 +308,11 @@ int Create(char *mddev, int mdfd, array.md_minor = MINOR(stb.st_rdev); array.not_persistent = 0; /*** FIX: Need to do something about RAID-6 here ***/ - if ( (level == 5 || level == 6) && - (insert_point < raiddisks || first_missing < raiddisks) ) + if ( ( (level == 5) && + (insert_point < raiddisks || first_missing < raiddisks) ) + || + ( level == 6 && missing_disks == 2) + ) array.state = 1; /* clean, but one+ drive will be missing */ else array.state = 0; /* not clean, but no errors */ diff --git a/Examine.c b/Examine.c index 7dec8d87..749bc7c1 100644 --- a/Examine.c +++ b/Examine.c @@ -145,7 +145,8 @@ int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust) printf(" Creation Time : %.24s\n", ctime(&atime)); c=map_num(pers, super.level); printf(" Raid Level : %s\n", c?c:"-unknown-"); - printf(" Device Size : %d%s\n", super.size, human_size((long long)super.size<<10)); + if (super.level <= 0) + printf(" Device Size : %u%s\n", super.size, human_size((long long)super.size<<10)); printf(" Raid Devices : %d\n", super.raid_disks); printf(" Total Devices : %d\n", super.nr_disks); printf("Preferred Minor : %d\n", super.md_minor); diff --git a/Makefile b/Makefile index a35c6f7a..8f0547b9 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,11 @@ # e.g. make CXFLAGS=-O to optimise TCC = tcc UCLIBC_GCC = $(shell for nm in i386-uclibc-linux-gcc i386-uclibc-gcc; do which $$nm > /dev/null && { echo $$nm ; exit; } ; done; echo false No uclibc found ) +DIET_GCC = diet gcc + +KLIBC=/home/src/klibc/klibc-0.77 + +KLIBC_GCC = gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 CC = gcc CXFLAGS = -ggdb @@ -52,11 +57,15 @@ MAN4DIR = $(MANDIR)/man4 MAN5DIR = $(MANDIR)/man5 MAN8DIR = $(MANDIR)/man8 - -KLIBC=/home/src/klibc/klibc-0.77 - -OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o -SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c +OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \ + Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o mdopen.o +SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \ + Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c mdopen.c +ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c +ifdef MDASSEMBLE_AUTO +ASSEMBLE_SRCS += mdopen.c mdstat.c +ASSEMBLE_FLAGS = -DMDASSEMBLE_AUTO +endif all : mdadm mdadm.man md.man mdadm.conf.man @@ -66,7 +75,7 @@ mdadm : $(OBJS) $(CC) $(LDFLAGS) -o mdadm $^ mdadm.static : $(OBJS) - $(CC) $(LDFLAGS) --static -o mdadm.static $^ + $(CC) $(LDFLAGS) -static -o mdadm.static $^ mdadm.tcc : $(SRCS) mdadm.h $(TCC) -o mdadm.tcc $(SRCS) @@ -78,15 +87,22 @@ mdadm.klibc : $(SRCS) mdadm.h rm -f $(OBJS) gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS) -mdassemble : mdassemble.c Assemble.c config.c dlink.c util.c mdadm.h +mdassemble : $(ASSEMBLE_SRCS) mdadm.h rm -f $(OBJS) - diet gcc -o mdassemble mdassemble.c Assemble.c config.c dlink.c util.c + $(DIET_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) -# This doesn't work -mdassemble.klibc : mdassemble.c Assemble.c config.c dlink.c util.c mdadm.h +mdassemble.static : $(ASSEMBLE_SRCS) mdadm.h + rm -f $(OBJS) + $(CC) $(LDFLAGS) $(ASSEMBLE_FLAGS) -static -o mdassemble $(ASSEMBLE_SRCS) + +mdassemble.uclibc : $(ASSEMBLE_SRCS) mdadm.h rm -f $(OBJS) - gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) -o mdassemble mdassemble.c Assemble.c config.c dlink.c util.c + $(UCLIBC_GCC) $(ASSEMBLE_FLAGS) -DUCLIBC -static -o mdassemble.uclibc $(ASSEMBLE_SRCS) +# This doesn't work +mdassemble.klibc : $(ASSEMBLE_SRCS) mdadm.h + rm -f $(OBJS) + $(KLIBC_GCC) $(CFLAGS) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) mdadm.man : mdadm.8 nroff -man mdadm.8 > mdadm.man @@ -106,7 +122,8 @@ install : mdadm mdadm.8 md.4 mdadm.conf.5 $(INSTALL) -D -m 644 mdadm.conf.5 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 clean : - rm -f mdadm $(OBJS) core *.man mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt + rm -f mdadm $(OBJS) core *.man mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \ + mdassemble mdassemble.static mdassemble.uclibc mdassemble.klibc dist : clean ./makedist diff --git a/Manage.c b/Manage.c index 624c775d..56cc1ad7 100644 --- a/Manage.c +++ b/Manage.c @@ -134,6 +134,23 @@ int Manage_resize(char *devname, int fd, long long size, int raid_disks) return 0; } +int Manage_reconfig(char *devname, int fd, int layout) +{ + mdu_array_info_t info; + if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) { + fprintf(stderr, Name ": Cannot get array information for %s: %s\n", + devname, strerror(errno)); + return 1; + } + info.layout = layout; + printf("layout set to %d\n", info.layout); + if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) { + fprintf(stderr, Name ": Cannot set layout for %s: %s\n", + devname, strerror(errno)); + return 1; + } + return 0; +} int Manage_subdevs(char *devname, int fd, mddev_dev_t devlist) diff --git a/Monitor.c b/Monitor.c index ce0087cf..036c47f7 100644 --- a/Monitor.c +++ b/Monitor.c @@ -47,7 +47,7 @@ static char *percentalerts[] = { int Monitor(mddev_dev_t devlist, char *mailaddr, char *alert_cmd, int period, int daemonise, int scan, int oneshot, - char *config, int test) + char *config, int test, char* pidfile) { /* * Every few seconds, scan every md device looking for changes @@ -127,7 +127,18 @@ int Monitor(mddev_dev_t devlist, if (daemonise) { int pid = fork(); if (pid > 0) { - printf("%d\n", pid); + if (!pidfile) + printf("%d\n", pid); + else { + FILE *pid_file; + pid_file=fopen(pidfile, "w"); + if (!pid_file) + perror("cannot create pid file"); + else { + fprintf(pid_file,"%d\n", pid); + fclose(pid_file); + } + } return 0; } if (pid < 0) { @@ -428,6 +439,8 @@ int Monitor(mddev_dev_t devlist, } test = 0; } + if (pidfile) + unlink(pidfile); return 0; } diff --git a/ReadMe.c b/ReadMe.c index 21a6543a..687752bc 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -29,7 +29,7 @@ #include "mdadm.h" -char Version[] = Name " - v1.7.0 - 11 August 2004\n"; +char Version[] = Name " - v1.8.0 - 01 November 2004\n"; /* * File: ReadMe.c * @@ -90,7 +90,7 @@ char Version[] = Name " - v1.7.0 - 11 August 2004\n"; * At the time if writing, there is only minimal support. */ -char short_options[]="-ABCDEFGQhVvbc:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1t"; +char short_options[]="-ABCDEFGQhVvbc:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1t"; struct option long_options[] = { {"manage", 0, 0, '@'}, {"misc", 0, 0, '#'}, @@ -157,6 +157,7 @@ struct option long_options[] = { {"daemonise", 0, 0, 'f'}, {"daemonize", 0, 0, 'f'}, {"oneshot", 0, 0, '1'}, + {"pid-file", 1, 0, 'i'}, {0, 0, 0, 0} }; @@ -418,6 +419,7 @@ char Help_monitor[] = " --config= -c : specify a different config file\n" " --scan -s : find mail-address/program in config file\n" " --daemonise -f : Fork and continue in child, parent exits\n" +" --pid-file= -i : In daemon mode write pid to specified file instead of stdout\n" " --oneshot -1 : Check for degraded arrays, then exit\n" " --test -t : Generate a TestMessage event against each array at startup\n" ; @@ -510,6 +512,7 @@ mapping_t pers[] = { { "6", 6}, { "raid10", 10}, { "10", 10}, + { "faulty", -5}, { NULL, 0} }; @@ -523,3 +526,24 @@ mapping_t modes[] = { { "monitor", MONITOR}, { "grow", GROW}, }; + +mapping_t faultylayout[] = { + { "write-transient", WriteTransient }, + { "wt", WriteTransient }, + { "read-transient", ReadTransient }, + { "rt", ReadTransient }, + { "write-persistent", WritePersistent }, + { "wp", WritePersistent }, + { "read-persistent", ReadPersistent }, + { "rp", ReadPersistent }, + { "write-all", WriteAll }, + { "wa", WriteAll }, + { "read-fixable", ReadFixable }, + { "rf", ReadFixable }, + + { "clear", ClearErrors}, + { "flush", ClearFaults}, + { "none", ClearErrors}, + { "default", ClearErrors}, + { NULL, 0} +}; diff --git a/inventory b/inventory index fac793b8..0fe1afcd 100644 --- a/inventory +++ b/inventory @@ -7,6 +7,7 @@ ANNOUNCE-1.4.0 ANNOUNCE-1.5.0 ANNOUNCE-1.6.0 ANNOUNCE-1.7.0 +ANNOUNCE-1.8.0 Assemble.c Build.c COPYING @@ -38,6 +39,9 @@ mdadm.conf.5 mdadm.h mdadm.spec mdassemble.c +mdopen.c mdstat.c +misc/ +misc/syslog-events raid5extend.c util.c diff --git a/md.4 b/md.4 index cb8027a7..4643dd25 100644 --- a/md.4 +++ b/md.4 @@ -166,6 +166,33 @@ real device. If one of these interfaces fails (e.g. due to cable problems), the multipath driver to attempt to redirect requests to another interface. +.SS FAULTY +The FAULTY md module is provided for testing purposes. A faulty array +has exactly one component device and is normally assembled without a +superblock, so the md array created provides direct access to all of +the data in the component device. + +The FAULTY module may be requested to simulate faults to allow testing +of other md levels or of filesystem. Faults can be chosen to trigger +on read requests or write requests, and can be transient (a subsequent +read/write at the address will probably succeed) or persistant +(subsequent read/write of the same address will fail). Further, read +faults can be "fixable" meaning that they persist until a write +request at the same address. + +Fault types can be requested with a period. In this case the fault +will recur repeatedly after the given number of request of the +relevant time. For example if persistent read faults have a period of +100, then ever 100th read request would generate a fault, and the +faulty sector would be recorded so that subsequent reads on that +sector would also fail. + +There is a limit to the number of faulty sectors that are remembered. +Faults generated after this limit is exhausted are treated as +transient. + +It list of faulty sectors can be flushed, and the active list of +failure modes can be cleared. .SS UNCLEAN SHUTDOWN diff --git a/mdadm.8 b/mdadm.8 index b9f5bfae..44318e1c 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.TH MDADM 8 "" v1.7.0 +.TH MDADM 8 "" v1.8.0 .SH NAME mdadm \- manage MD devices .I aka @@ -30,14 +30,17 @@ md devices, .BR RAID4 , .BR RAID5 , .BR RAID6 , +.BR MULTIPATH , and -.BR MULTIPATH . +.BR FAULTY . .B MULTIPATH is not a Software RAID mechanism, but does involve multiple devices. For .B MULTIPATH each device is a path to one common physical storage device. +.B FAULTY is also no true RAID, and it only involves one device. It +provides a layer over a true device that can be used to inject faults. .B mdadm is a program that can be used to create, manage, and monitor @@ -246,7 +249,7 @@ Specify rounding factor for linear array (==chunk size) Set raid level. When used with .IR --create , options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid4, 4, -raid5, 5, raid6, 6, multipath, mp. Obviously some of these are synonymous. +raid5, 5, raid6, 6, multipath, mp, fautly. Obviously some of these are synonymous. When used with .IR --build , @@ -261,6 +264,39 @@ right-asymmetric, right-symmetric, la, ra, ls, rs. The default is left-symmetric. +This option is also used to set the failure mode for +.IR faulty . +The options are: +write-transient, +wt, +read-transient, +rt, +write-presistent, +wp, +read-persistent, +rp, +write-all, +read-fixable, +rf, +clear, +flush, +none. + +Each mode can be followed by a number which is used as a period +between fault generation. Without a number, the fault is generated +once on the first relevant request. With a number, the fault will be +generated after that many request, and will continue to be generated +every time the period elapses. + +Multiple failure modes can be current simultaneously by using the +"--grow" option to set subsequent failure modes. + +"clear" or "none" will remove any pending or periodic failure modes, +and "flush" will clear any persistant faults. + +To set the parity with "--grow", the level of the array ("faulty") +must be specified before the fault mode is specified. + .TP .BR --layout= same as --parity @@ -539,6 +575,13 @@ This is useful with which will only continue monitoring if a mail address or alert program is found in the config file. +.TP +.BR -i ", " --pid-file +When +.B mdadm +is running in daemon mode, write the pid of the daemon process to +the specified file, instead of printing it on standard output. + .TP .BR -1 ", " --oneshot Check arrays only once. This will generate @@ -741,7 +784,7 @@ will automatically create a degraded array with an extra spare drive. This is because building the spare into a degraded array is in general faster than resyncing the parity on a non-degraded, but not clean, array. This feature can be over-ridden with the --I --force +.I --force option. '''If the @@ -793,7 +836,7 @@ Usage: .I devices ... .PP -MISC mode includes a number if distinct operations that +MISC mode includes a number of distinct operations that operate on distinct devices. The operations are: .TP --query diff --git a/mdadm.c b/mdadm.c index e1816737..d40008ea 100644 --- a/mdadm.c +++ b/mdadm.c @@ -32,167 +32,6 @@ #include -void make_parts(char *dev, int cnt) -{ - /* make 'cnt' partition devices for 'dev' - * We use the major/minor from dev and add 1..cnt - * If dev ends with a digit, we add "_p%d" else "%d" - * If the name exists, we use it's owner/mode, - * else that of dev - */ - struct stat stb; - int major, minor; - int i; - char *name = malloc(strlen(dev) + 20); - int dig = isdigit(dev[strlen(dev)-1]); - - if (stat(dev, &stb)!= 0) - return; - if (!S_ISBLK(stb.st_mode)) - return; - major = MAJOR(stb.st_rdev); - minor = MINOR(stb.st_rdev); - for (i=1; i <= cnt ; i++) { - struct stat stb2; - sprintf(name, "%s%s%d", dev, dig?"_p":"", i); - if (stat(name, &stb2)==0) { - if (!S_ISBLK(stb2.st_mode)) - continue; - if (stb2.st_rdev == MKDEV(major, minor+i)) - continue; - unlink(name); - } else { - stb2 = stb; - } - mknod(name, S_IFBLK | 0600, MKDEV(major, minor+i)); - chown(name, stb2.st_uid, stb2.st_gid); - chmod(name, stb2.st_mode & 07777); - } -} - -/* - * Open a given md device, and check that it really is one. - * If 'autof' is given, then we need to create, or recreate, the md device. - * If the name already exists, and is not a block device, we fail. - * If it exists and is not an md device, is not the right type (partitioned or not), - * or is currently in-use, we remove the device, but remember the owner and mode. - * If it now doesn't exist, we find a few md array and create the device. - * Default ownership is user=0, group=0 perm=0600 - */ -int open_mddev(char *dev, int autof) -{ - int mdfd; - struct stat stb; - int major = MD_MAJOR; - int minor; - int must_remove = 0; - struct mdstat_ent *mdlist; - int num; - - if (autof) { - /* autof is set, so we need to check that the name is ok, - * and possibly create one if not - */ - stb.st_mode = 0; - if (lstat(dev, &stb)==0 && ! S_ISBLK(stb.st_mode)) { - fprintf(stderr, Name ": %s is not a block device.\n", - dev); - return -1; - } - /* check major number is correct */ - if (autof>0) - major = get_mdp_major(); - if (stb.st_mode && MAJOR(stb.st_rdev) != major) - must_remove = 1; - if (stb.st_mode && !must_remove) { - mdu_array_info_t array; - /* looks ok, see if it is available */ - mdfd = open(dev, O_RDWR, 0); - if (mdfd < 0) { - fprintf(stderr, Name ": error opening %s: %s\n", - dev, strerror(errno)); - return -1; - } else if (md_get_version(mdfd) <= 0) { - fprintf(stderr, Name ": %s does not appear to be an md device\n", - dev); - close(mdfd); - return -1; - } - if (ioctl(mdfd, GET_ARRAY_INFO, &array)==0) { - /* already active */ - must_remove = 1; - close(mdfd); - } else { - if (autof > 0) - make_parts(dev, autof); - return mdfd; - } - } - /* Ok, need to find a minor that is not in use. - * Easiest to read /proc/mdstat, and hunt through for - * an unused number - */ - mdlist = mdstat_read(0); - for (num= (autof>0)?-1:0 ; ; num+= (autof>2)?-1:1) { - struct mdstat_ent *me; - for (me=mdlist; me; me=me->next) - if (me->devnum == num) - break; - if (!me) { - /* doesn't exist if mdstat. - * make sure it is new to /dev too - */ - char *dn; - if (autof > 0) - minor = (-1-num) << MdpMinorShift; - else - minor = num; - dn = map_dev(major,minor); - if (dn==NULL || is_standard(dn)) { - /* this number only used by a 'standard' name, - * so it is safe to use - */ - break; - } - } - } - /* 'num' is the number to use, >=0 for md, <0 for mdp */ - if (must_remove) { - /* never remove a device name that ends /mdNN or /dNN, - * that would be confusing - */ - if (is_standard(dev)) { - fprintf(stderr, Name ": --auto refusing to remove %s as it looks like a standard name.\n", - dev); - return -1; - } - unlink(dev); - } - - if (mknod(dev, S_IFBLK|0600, MKDEV(major, minor))!= 0) { - fprintf(stderr, Name ": failed to create %s\n", dev); - return -1; - } - if (must_remove) { - chown(dev, stb.st_uid, stb.st_gid); - chmod(dev, stb.st_mode & 07777); - } - make_parts(dev,autof); - } - mdfd = open(dev, O_RDWR, 0); - if (mdfd < 0) - fprintf(stderr, Name ": error opening %s: %s\n", - dev, strerror(errno)); - else if (md_get_version(mdfd) <= 0) { - fprintf(stderr, Name ": %s does not appear to be an md device\n", - dev); - close(mdfd); - mdfd = -1; - } - return mdfd; -} - - int main(int argc, char *argv[]) { @@ -233,6 +72,7 @@ int main(int argc, char *argv[]) char *program = NULL; int delay = 0; int daemonise = 0; + char *pidfile = NULL; int oneshot = 0; int copies; @@ -420,6 +260,7 @@ int main(int argc, char *argv[]) } continue; + case O(GROW,'l'): /* hack - needed to understand layout */ case O(CREATE,'l'): case O(BUILD,'l'): /* set raid level*/ if (level != UnSet) { @@ -433,7 +274,7 @@ int main(int argc, char *argv[]) optarg); exit(2); } - if (level != 0 && level != -1 && level != 1 && level != -4 && mode == BUILD) { + if (level != 0 && level != -1 && level != 1 && level != -4 && level != -5 && mode == BUILD) { fprintf(stderr, Name ": Raid level %s not permitted with --build.\n", optarg); exit(2); @@ -447,6 +288,8 @@ int main(int argc, char *argv[]) continue; case O(CREATE,'p'): /* raid5 layout */ + case O(BUILD,'p'): /* faulty layout */ + case O(GROW, 'p'): /* faulty reconfig */ if (layout != UnSet) { fprintf(stderr,Name ": layout may only be sent once. " "Second value was %s\n", optarg); @@ -485,6 +328,23 @@ int main(int argc, char *argv[]) else layout = 1 + (copies<<8); break; + case -5: /* Faulty + * modeNNN + */ + + { + int ln = strcspn(optarg, "0123456789"); + char *m = strdup(optarg); + int mode; + m[ln] = 0; + mode = map_name(faultylayout, m); + if (mode == UnSet) { + fprintf(stderr, Name ": layout %s not understood for faulty.\n", + optarg); + exit(2); + } + layout = mode | (atoi(optarg+ln)<< ModeShift); + } } continue; @@ -507,7 +367,7 @@ int main(int argc, char *argv[]) optarg); exit(2); } - if (raiddisks == 1 && !force) { + if (raiddisks == 1 && !force && level != -5) { fprintf(stderr, Name ": '1' is an unusual number of drives for an array, so it is probably\n" " a mistake. If you really mean it you will need to specify --force before\n" " setting the number of drives.\n"); @@ -677,6 +537,13 @@ int main(int argc, char *argv[]) case O(MONITOR,'f'): /* daemonise */ daemonise = 1; continue; + case O(MONITOR,'i'): /* pid */ + if (pidfile) + fprintf(stderr, Name ": only specify one pid file. %s ignored.\n", + optarg); + else + pidfile = optarg; + continue; case O(MONITOR,'1'): /* oneshot */ oneshot = 1; continue; @@ -820,19 +687,20 @@ int main(int argc, char *argv[]) ident.super_minor == UnSet && !scan ) { /* Only a device has been given, so get details from config file */ mddev_ident_t array_ident = conf_get_ident(configfile, devlist->devname); - mdfd = open_mddev(devlist->devname, array_ident->autof); - if (mdfd < 0) + if (array_ident == NULL) { + fprintf(stderr, Name ": %s not identified in config file.\n", + devlist->devname); rv |= 1; - else { - if (array_ident == NULL) { - fprintf(stderr, Name ": %s not identified in config file.\n", - devlist->devname); + } else { + mdfd = open_mddev(devlist->devname, array_ident->autof); + if (mdfd < 0) rv |= 1; - } - else + else { rv |= Assemble(devlist->devname, mdfd, array_ident, configfile, NULL, readonly, runstop, update, verbose, force); + close(mdfd); + } } } else if (!scan) rv = Assemble(devlist->devname, mdfd, &ident, configfile, @@ -845,20 +713,21 @@ int main(int argc, char *argv[]) } for (dv = devlist ; dv ; dv=dv->next) { mddev_ident_t array_ident = conf_get_ident(configfile, dv->devname); - mdfd = open_mddev(dv->devname, array_ident->autof); - if (mdfd < 0) { - rv |= 1; - continue; - } if (array_ident == NULL) { fprintf(stderr, Name ": %s not identified in config file.\n", dv->devname); rv |= 1; continue; } + mdfd = open_mddev(dv->devname, array_ident->autof); + if (mdfd < 0) { + rv |= 1; + continue; + } rv |= Assemble(dv->devname, mdfd, array_ident, configfile, NULL, readonly, runstop, update, verbose, force); + close(mdfd); } } else { mddev_ident_t array_list = conf_get_ident(configfile, NULL); @@ -884,7 +753,7 @@ int main(int argc, char *argv[]) } break; case BUILD: - rv = Build(devlist->devname, mdfd, chunk, level, raiddisks, devlist->next, assume_clean); + rv = Build(devlist->devname, mdfd, chunk, level, layout, raiddisks, devlist->next, assume_clean); break; case CREATE: rv = Create(devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size, @@ -963,8 +832,13 @@ int main(int argc, char *argv[]) rv = 1; break; } + if (pidfile && !daemonise) { + fprintf(stderr, Name ": Cannot write a pid file when not in daemon mode\n"); + rv = 1; + break; + } rv= Monitor(devlist, mailaddr, program, - delay?delay:60, daemonise, scan, oneshot, configfile, test); + delay?delay:60, daemonise, scan, oneshot, configfile, test, pidfile); break; case GROW: @@ -981,12 +855,16 @@ int main(int argc, char *argv[]) if (rv) break; } - } else if (size >= 0 && raiddisks) { - fprintf(stderr, Name ": can only grow size OR raiddisks, not both\n"); + } else if ((size >= 0) + (raiddisks != 0) + (layout != UnSet) > 1) { + fprintf(stderr, Name ": can change at most one of size, raiddisks, and layout\n"); rv = 1; break; - } else + } else if (layout != UnSet) + rv = Manage_reconfig(devlist->devname, mdfd, layout); + else if (size >= 0 || raiddisks) rv = Manage_resize(devlist->devname, mdfd, size, raiddisks); + else + fprintf(stderr, Name ": no changes to --grow\n"); break; } exit(rv); diff --git a/mdadm.h b/mdadm.h index 831e2671..a816846c 100644 --- a/mdadm.h +++ b/mdadm.h @@ -150,7 +150,7 @@ extern void mdstat_wait(int seconds); extern char *map_num(mapping_t *map, int num); extern int map_name(mapping_t *map, char *name); -extern mapping_t r5layout[], pers[], modes[]; +extern mapping_t r5layout[], pers[], modes[], faultylayout[]; extern char *map_dev(int major, int minor); @@ -158,6 +158,7 @@ extern char *map_dev(int major, int minor); extern int Manage_ro(char *devname, int fd, int readonly); extern int Manage_runstop(char *devname, int fd, int runstop); extern int Manage_resize(char *devname, int fd, long long size, int raid_disks); +extern int Manage_reconfig(char *devname, int fd, int layout); extern int Manage_subdevs(char *devname, int fd, mddev_dev_t devlist); extern int Grow_Add_device(char *devname, int fd, char *newdev); @@ -171,7 +172,7 @@ extern int Assemble(char *mddev, int mdfd, char *update, int verbose, int force); -extern int Build(char *mddev, int mdfd, int chunk, int level, +extern int Build(char *mddev, int mdfd, int chunk, int level, int layout, int raiddisks, mddev_dev_t devlist, int assume_clean); @@ -187,7 +188,7 @@ extern int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust); extern int Monitor(mddev_dev_t devlist, char *mailaddr, char *alert_cmd, int period, int daemonise, int scan, int oneshot, - char *config, int test); + char *config, int test, char *pidfile); extern int Kill(char *dev, int force); @@ -227,3 +228,30 @@ extern void put_md_name(char *name); extern char *get_md_name(int dev); extern char DefaultConfFile[]; + +extern int open_mddev(char *dev, int autof); + + +#define LEVEL_MULTIPATH (-4) +#define LEVEL_LINEAR (-1) +#define LEVEL_FAULTY (-5) + + +/* faulty stuff */ + +#define WriteTransient 0 +#define ReadTransient 1 +#define WritePersistent 2 +#define ReadPersistent 3 +#define WriteAll 4 /* doesn't go to device */ +#define ReadFixable 5 +#define Modes 6 + +#define ClearErrors 31 +#define ClearFaults 30 + +#define AllPersist 100 /* internal use only */ +#define NoPersist 101 + +#define ModeMask 0x1f +#define ModeShift 5 diff --git a/mdadm.spec b/mdadm.spec index f55d9339..621ca6ac 100644 --- a/mdadm.spec +++ b/mdadm.spec @@ -1,6 +1,6 @@ Summary: mdadm is used for controlling Linux md devices (aka RAID arrays) Name: mdadm -Version: 1.7.0 +Version: 1.8.0 Release: 1 Source: http://www.cse.unsw.edu.au/~neilb/source/mdadm/mdadm-%{version}.tgz URL: http://www.cse.unsw.edu.au/~neilb/source/mdadm/ diff --git a/mdassemble.c b/mdassemble.c index 55055dd1..43aed3c8 100644 --- a/mdassemble.c +++ b/mdassemble.c @@ -46,11 +46,16 @@ mapping_t pers[] = { { "5", 5}, { "multipath", -4}, { "mp", -4}, + { "raid6", 6}, + { "6", 6}, + { "raid10", 10}, + { "10", 10}, { NULL, 0} }; +#ifndef MDASSEMBLE_AUTO /* from mdadm.c */ -int open_mddev(char *dev) +int open_mddev(char *dev, int autof/*unused */) { int mdfd = open(dev, O_RDWR, 0); if (mdfd < 0) @@ -64,6 +69,7 @@ int open_mddev(char *dev) } return mdfd; } +#endif char *configfile = NULL; int rv; @@ -81,7 +87,7 @@ int main() { } else for (; array_list; array_list = array_list->next) { mdu_array_info_t array; - mdfd = open_mddev(array_list->devname); + mdfd = open_mddev(array_list->devname, array_list->autof); if (mdfd < 0) { rv |= 1; continue; diff --git a/mdopen.c b/mdopen.c new file mode 100644 index 00000000..69c4a232 --- /dev/null +++ b/mdopen.c @@ -0,0 +1,193 @@ +/* + * mdadm - manage Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2001-2002 Neil Brown + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: + * Paper: Neil Brown + * School of Computer Science and Engineering + * The University of New South Wales + * Sydney, 2052 + * Australia + */ + +#include "mdadm.h" +#include "md_p.h" +#include + +void make_parts(char *dev, int cnt) +{ + /* make 'cnt' partition devices for 'dev' + * We use the major/minor from dev and add 1..cnt + * If dev ends with a digit, we add "_p%d" else "%d" + * If the name exists, we use it's owner/mode, + * else that of dev + */ + struct stat stb; + int major, minor; + int i; + char *name = malloc(strlen(dev) + 20); + int dig = isdigit(dev[strlen(dev)-1]); + + if (stat(dev, &stb)!= 0) + return; + if (!S_ISBLK(stb.st_mode)) + return; + major = MAJOR(stb.st_rdev); + minor = MINOR(stb.st_rdev); + for (i=1; i <= cnt ; i++) { + struct stat stb2; + sprintf(name, "%s%s%d", dev, dig?"_p":"", i); + if (stat(name, &stb2)==0) { + if (!S_ISBLK(stb2.st_mode)) + continue; + if (stb2.st_rdev == MKDEV(major, minor+i)) + continue; + unlink(name); + } else { + stb2 = stb; + } + mknod(name, S_IFBLK | 0600, MKDEV(major, minor+i)); + chown(name, stb2.st_uid, stb2.st_gid); + chmod(name, stb2.st_mode & 07777); + } +} + +/* + * Open a given md device, and check that it really is one. + * If 'autof' is given, then we need to create, or recreate, the md device. + * If the name already exists, and is not a block device, we fail. + * If it exists and is not an md device, is not the right type (partitioned or not), + * or is currently in-use, we remove the device, but remember the owner and mode. + * If it now doesn't exist, we find a few md array and create the device. + * Default ownership is user=0, group=0 perm=0600 + */ +int open_mddev(char *dev, int autof) +{ + int mdfd; + struct stat stb; + int major = MD_MAJOR; + int minor; + int must_remove = 0; + struct mdstat_ent *mdlist; + int num; + + if (autof) { + /* autof is set, so we need to check that the name is ok, + * and possibly create one if not + */ + stb.st_mode = 0; + if (lstat(dev, &stb)==0 && ! S_ISBLK(stb.st_mode)) { + fprintf(stderr, Name ": %s is not a block device.\n", + dev); + return -1; + } + /* check major number is correct */ + if (autof>0) + major = get_mdp_major(); + if (stb.st_mode && MAJOR(stb.st_rdev) != major) + must_remove = 1; + if (stb.st_mode && !must_remove) { + mdu_array_info_t array; + /* looks ok, see if it is available */ + mdfd = open(dev, O_RDWR, 0); + if (mdfd < 0) { + fprintf(stderr, Name ": error opening %s: %s\n", + dev, strerror(errno)); + return -1; + } else if (md_get_version(mdfd) <= 0) { + fprintf(stderr, Name ": %s does not appear to be an md device\n", + dev); + close(mdfd); + return -1; + } + if (ioctl(mdfd, GET_ARRAY_INFO, &array)==0) { + /* already active */ + must_remove = 1; + close(mdfd); + } else { + if (autof > 0) + make_parts(dev, autof); + return mdfd; + } + } + /* Ok, need to find a minor that is not in use. + * Easiest to read /proc/mdstat, and hunt through for + * an unused number + */ + mdlist = mdstat_read(0); + for (num= (autof>0)?-1:0 ; ; num+= (autof>2)?-1:1) { + struct mdstat_ent *me; + for (me=mdlist; me; me=me->next) + if (me->devnum == num) + break; + if (!me) { + /* doesn't exist if mdstat. + * make sure it is new to /dev too + */ + char *dn; + if (autof > 0) + minor = (-1-num) << MdpMinorShift; + else + minor = num; + dn = map_dev(major,minor); + if (dn==NULL || is_standard(dn)) { + /* this number only used by a 'standard' name, + * so it is safe to use + */ + break; + } + } + } + /* 'num' is the number to use, >=0 for md, <0 for mdp */ + if (must_remove) { + /* never remove a device name that ends /mdNN or /dNN, + * that would be confusing + */ + if (is_standard(dev)) { + fprintf(stderr, Name ": --auto refusing to remove %s as it looks like a standard name.\n", + dev); + return -1; + } + unlink(dev); + } + + if (mknod(dev, S_IFBLK|0600, MKDEV(major, minor))!= 0) { + fprintf(stderr, Name ": failed to create %s\n", dev); + return -1; + } + if (must_remove) { + chown(dev, stb.st_uid, stb.st_gid); + chmod(dev, stb.st_mode & 07777); + } + make_parts(dev,autof); + } + mdfd = open(dev, O_RDWR, 0); + if (mdfd < 0) + fprintf(stderr, Name ": error opening %s: %s\n", + dev, strerror(errno)); + else if (md_get_version(mdfd) <= 0) { + fprintf(stderr, Name ": %s does not appear to be an md device\n", + dev); + close(mdfd); + mdfd = -1; + } + return mdfd; +} + diff --git a/misc/syslog-events b/misc/syslog-events new file mode 100644 index 00000000..fe8c14e4 --- /dev/null +++ b/misc/syslog-events @@ -0,0 +1,27 @@ +#!/bin/sh +# +# sample event handling script for mdadm +# e.g. mdadm --follow --program=/sbin/syslog-events --scan +# +# License: GPL ver.2 +# Copyright (C) 2004 SEKINE Tatsuo + +event="$1" +dev="$2" +disc="$3" + +facility="kern" +tag="mdmonitor" + +case x"${event}" in + xFail*) priority="error" ;; + xTest*) priority="debug" ;; + x*) priority="info" ;; +esac + +msg="${event} event on ${dev}" +if [ x"${disc}" != x ]; then + msg="${msg}, related to disc ${disc}" +fi + +exec logger -t "${tag}" -p "${facility}.${priority}" -- "${msg}" diff --git a/util.c b/util.c index 5ef59c08..121ddbb7 100644 --- a/util.c +++ b/util.c @@ -212,15 +212,23 @@ int load_super(int fd, mdp_super_t *super) * 6 - wrong major version */ unsigned long size; + unsigned long long dsize; unsigned long long offset; - if (ioctl(fd, BLKGETSIZE, &size)) - return 1; +#ifdef BLKGETSIZE64 + if (ioctl(fd, BLKGETSIZE64, &dsize) != 0) +#endif + { + if (ioctl(fd, BLKGETSIZE, &size)) + return 1; + else + dsize = size << 9; + } - if (size < MD_RESERVED_SECTORS*2) + if (dsize < MD_RESERVED_SECTORS*2) return 2; - offset = MD_NEW_SIZE_SECTORS(size); + offset = MD_NEW_SIZE_SECTORS(dsize>>9); offset *= 512; @@ -242,16 +250,25 @@ int load_super(int fd, mdp_super_t *super) int store_super(int fd, mdp_super_t *super) { - long size; + unsigned long size; + unsigned long long dsize; + long long offset; - if (ioctl(fd, BLKGETSIZE, &size)) - return 1; +#ifdef BLKGETSIZE64 + if (ioctl(fd, BLKGETSIZE64, &dsize) != 0) +#endif + { + if (ioctl(fd, BLKGETSIZE, &size)) + return 1; + else + dsize = ((unsigned long long)size) << 9; + } - if (size < MD_RESERVED_SECTORS*2) + if (dsize < MD_RESERVED_SECTORS*2) return 2; - offset = MD_NEW_SIZE_SECTORS(size); + offset = MD_NEW_SIZE_SECTORS(dsize>>9); offset *= 512; -- 2.39.2