From 8382f19bdcc6d2d1de92154e11129acdcaab10fa Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Thu, 21 Dec 2006 17:10:52 +1100 Subject: [PATCH] Add new mode: --incremental --incremental allows arrays to be assembled one device at a time. This is expected to be used with udev. --- ANNOUNCE-2.6 | 0 ChangeLog | 2 + Incremental.c | 721 ++++++++++++++++++++++++++++++++++++++++++ Makefile | 8 +- Manage.c | 14 + Monitor.c | 2 +- ReadMe.c | 34 +- config.c | 18 +- kernel-patch-2.6.18 | 35 ++ kernel-patch-2.6.18.6 | 35 ++ kernel-patch-2.6.19 | 34 ++ mapfile.c | 197 ++++++++++++ mdadm.8 | 210 +++++++++++- mdadm.c | 39 +++ mdadm.h | 33 ++ mdopen.c | 48 +++ mdstat.c | 12 + super0.c | 3 + sysfs.c | 30 +- util.c | 8 + 20 files changed, 1468 insertions(+), 15 deletions(-) create mode 100644 ANNOUNCE-2.6 create mode 100644 Incremental.c create mode 100644 kernel-patch-2.6.18 create mode 100644 kernel-patch-2.6.18.6 create mode 100644 kernel-patch-2.6.19 create mode 100644 mapfile.c diff --git a/ANNOUNCE-2.6 b/ANNOUNCE-2.6 new file mode 100644 index 00000000..e69de29b diff --git a/ChangeLog b/ChangeLog index 38ae4887..3d684067 100644 --- a/ChangeLog +++ b/ChangeLog @@ -28,6 +28,8 @@ Changes Prior to this release - Don't hold md device open for so long in --monitor mode - map_dev can be slow and interferes with trying to stop the array. - Support --uuid= with --create to choose your own UUID. + - New major more "--incremental" for incremental assemble of arrays, + intended for use with udev. Changes Prior to 2.5.6 release - Fix bug which meant "bitmap=xxx" in mdadm.conf was not handled diff --git a/Incremental.c b/Incremental.c new file mode 100644 index 00000000..ebe501f7 --- /dev/null +++ b/Incremental.c @@ -0,0 +1,721 @@ +/* + * Incremental.c - support --incremental. Part of: + * mdadm - manage Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2006 Neil Brown + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: + * Paper: Neil Brown + * Novell Inc + * GPO Box Q1283 + * QVB Post Office, NSW 1230 + * Australia + */ + +#include "mdadm.h" + +static int count_active(struct supertype *st, int mdfd, char **availp, + struct mdinfo *info); +static void find_reject(int mdfd, struct supertype *st, struct sysarray *sra, + int number, __u64 events, int verbose, + char *array_name); + +int Incremental(char *devname, int verbose, int runstop, + struct supertype *st, char *homehost, int autof) +{ + /* Add this device to an array, creating the array if necessary + * and starting the array if sensibe or - if runstop>0 - if possible. + * + * This has several steps: + * + * 1/ Check if device is permitted by mdadm.conf, reject if not. + * 2/ Find metadata, reject if none appropriate (check + * version/name from args) + * 3/ Check if there is a match in mdadm.conf + * 3a/ if not, check for homehost match. If no match, reject. + * 4/ Determine device number. + * - If in mdadm.conf with std name, use that + * - UUID in /var/run/mdadm.map use that + * - If name is suggestive, use that. unless in use with different uuid. + * - Choose a free, high number. + * - Use a partitioned device unless strong suggestion not to. + * e.g. auto=md + * 5/ Find out if array already exists + * 5a/ if it does not + * - choose a name, from mdadm.conf or 'name' field in array. + * - create the array + * - add the device + * 5b/ if it does + * - check one drive in array to make sure metadata is a reasonably + * close match. Reject if not (e.g. different type) + * - add the device + * 6/ Make sure /var/run/mdadm.map contains this array. + * 7/ Is there enough devices to possibly start the array? + * 7a/ if not, finish with success. + * 7b/ if yes, + * - read all metadata and arrange devices like -A does + * - if number of OK devices match expected, or -R and there are enough, + * start the array (auto-readonly). + */ + struct stat stb; + void *super, *super2; + struct mdinfo info, info2; + struct mddev_ident_s *array_list, *match; + char chosen_name[1024]; + int rv; + int devnum; + struct map_ent *mp, *map = NULL; + int dfd, mdfd; + char *avail; + int active_disks; + + + struct createinfo *ci = conf_get_create_info(); + + if (autof == 0) + autof = ci->autof; + + /* 1/ Check if devices is permitted by mdadm.conf */ + + if (!conf_test_dev(devname)) { + if (verbose >= 0) + fprintf(stderr, Name + ": %s not permitted by mdadm.conf.\n", + devname); + return 1; + } + + /* 2/ Find metadata, reject if none appropriate (check + * version/name from args) */ + + dfd = dev_open(devname, O_RDONLY|O_EXCL); + if (dfd < 0) { + if (verbose >= 0) + fprintf(stderr, Name ": cannot open %s: %s.\n", + devname, strerror(errno)); + return 1; + } + if (fstat(dfd, &stb) < 0) { + if (verbose >= 0) + fprintf(stderr, Name ": fstat failed for %s: %s.\n", + devname, strerror(errno)); + close(dfd); + return 1; + } + if ((stb.st_mode & S_IFMT) != S_IFBLK) { + if (verbose >= 0) + fprintf(stderr, Name ": %s is not a block device.\n", + devname); + close(dfd); + return 1; + } + + if (st == NULL && (st = guess_super(dfd)) == NULL) { + if (verbose >= 0) + fprintf(stderr, Name + ": no recognisable superblock on %s.\n", + devname); + close(dfd); + return 1; + } + if (st->ss->load_super(st, dfd, &super, NULL)) { + if (verbose >= 0) + fprintf(stderr, Name ": no RAID superblock on %s.\n", + devname); + close(dfd); + return 1; + } + st->ss->getinfo_super(&info, super); + close (dfd); + + /* 3/ Check if there is a match in mdadm.conf */ + + array_list = conf_get_ident(NULL); + match = NULL; + for (; array_list; array_list = array_list->next) { + if (array_list->uuid_set && + same_uuid(array_list->uuid, info.uuid, st->ss->swapuuid) + == 0) { + if (verbose >= 2) + fprintf(stderr, Name + ": UUID differs from %s.\n", + array_list->devname); + continue; + } + if (array_list->name[0] && + strcasecmp(array_list->name, info.name) != 0) { + if (verbose >= 2) + fprintf(stderr, Name + ": Name differs from %s.\n", + array_list->devname); + continue; + } + if (array_list->devices && + !match_oneof(array_list->devices, devname)) { + if (verbose >= 2) + fprintf(stderr, Name + ": Not a listed device for %s.\n", + array_list->devname); + continue; + } + if (array_list->super_minor != UnSet && + array_list->super_minor != info.array.md_minor) { + if (verbose >= 2) + fprintf(stderr, Name + ": Different super-minor to %s.\n", + array_list->devname); + continue; + } + if (!array_list->uuid_set && + !array_list->name[0] && + !array_list->devices && + array_list->super_minor == UnSet) { + if (verbose >= 2) + fprintf(stderr, Name + ": %s doesn't have any identifying information.\n", + array_list->devname); + continue; + } + /* FIXME, should I check raid_disks and level too?? */ + + if (match) { + if (verbose >= 0) + fprintf(stderr, Name + ": we match both %s and %s - cannot decide which to use.\n", + match->devname, array_list->devname); + return 2; + } + match = array_list; + } + + /* 3a/ if not, check for homehost match. If no match, reject. */ + if (!match) { + if (homehost == NULL || + st->ss->match_home(super, homehost) == 0) { + if (verbose >= 0) + fprintf(stderr, Name + ": not found in mdadm.conf and not identified by homehost.\n"); + return 2; + } + } + /* 4/ Determine device number. */ + /* - If in mdadm.conf with std name, use that */ + /* - UUID in /var/run/mdadm.map use that */ + /* - If name is suggestive, use that. unless in use with */ + /* different uuid. */ + /* - Choose a free, high number. */ + /* - Use a partitioned device unless strong suggestion not to. */ + /* e.g. auto=md */ + if (match && is_standard(match->devname, &devnum)) + /* We have devnum now */; + else if ((mp = map_by_uuid(&map, info.uuid)) != NULL) + devnum = mp->devnum; + else { + /* Have to guess a bit. */ + int use_partitions = 1; + char *np, *ep; + if ((autof&7) == 3 || (autof&7) == 5) + use_partitions = 0; + np = strchr(info.name, ':'); + if (np) + np++; + else + np = info.name; + devnum = strtoul(np, &ep, 10); + if (ep > np && *ep == 0) { + /* This is a number. Let check that it is unused. */ + if (mddev_busy(use_partitions ? (-1-devnum) : devnum)) + devnum = -1; + } else + devnum = -1; + + if (devnum < 0) { + /* Haven't found anything yet, choose something free */ + /* There is similar code in mdopen.c - should unify */ + for (devnum = 127 ; devnum != 128 ; + devnum = devnum ? devnum-1 : (1<<22)-1) { + if (mddev_busy(use_partitions ? + (-1-devnum) : devnum)) + break; + } + if (devnum == 128) { + fprintf(stderr, Name + ": No spare md devices!!\n"); + return 2; + } + } + devnum = use_partitions ? (-1-devnum) : devnum; + } + mdfd = open_mddev_devnum(match ? match->devname : NULL, + devnum, + info.name, + chosen_name); + if (mdfd < 0) { + fprintf(stderr, Name ": failed to open %s: %s.\n", + chosen_name, strerror(errno)); + return 2; + } + /* 5/ Find out if array already exists */ + if (! mddev_busy(devnum)) { + /* 5a/ if it does not */ + /* - choose a name, from mdadm.conf or 'name' field in array. */ + /* - create the array */ + /* - add the device */ + mdu_array_info_t ainf; + mdu_disk_info_t disk; + char md[20]; + struct sysarray *sra; + + memset(&ainf, 0, sizeof(ainf)); + ainf.major_version = st->ss->major; + ainf.minor_version = st->minor_version; + if (ioctl(mdfd, SET_ARRAY_INFO, &ainf) != 0) { + fprintf(stderr, Name + ": SET_ARRAY_INFO failed for %s: %s\b", + chosen_name, strerror(errno)); + close(mdfd); + return 2; + } + sprintf(md, "%d.%d\n", st->ss->major, st->minor_version); + sra = sysfs_read(mdfd, devnum, GET_VERSION); + sysfs_set_str(sra, NULL, "metadata_version", md); + memset(&disk, 0, sizeof(disk)); + disk.major = major(stb.st_rdev); + disk.minor = minor(stb.st_rdev); + sysfs_free(sra); + if (ioctl(mdfd, ADD_NEW_DISK, &disk) != 0) { + fprintf(stderr, Name ": failed to add %s to %s: %s.\n", + devname, chosen_name, strerror(errno)); + ioctl(mdfd, STOP_ARRAY, 0); + close(mdfd); + return 2; + } + sra = sysfs_read(mdfd, devnum, GET_DEVS); + if (!sra || !sra->devs || sra->devs->role >= 0) { + /* It really should be 'none' - must be old buggy + * kernel, and mdadm -I may not be able to complete. + * So reject it. + */ + ioctl(mdfd, STOP_ARRAY, NULL); + fprintf(stderr, Name + ": You have an old buggy kernel which cannot support\n" + " --incremental reliably. Aborting.\n"); + close(mdfd); + sysfs_free(sra); + return 2; + } + } else { + /* 5b/ if it does */ + /* - check one drive in array to make sure metadata is a reasonably */ + /* close match. Reject if not (e.g. different type) */ + /* - add the device */ + char dn[20]; + int dfd2; + mdu_disk_info_t disk; + int err; + struct sysarray *sra; + sra = sysfs_read(mdfd, devnum, (GET_VERSION | GET_DEVS | + GET_STATE)); + if (sra->major_version != st->ss->major || + sra->minor_version != st->minor_version) { + if (verbose >= 0) + fprintf(stderr, Name + ": %s has different metadata to chosen array %s %d.%d %d.%d.\n", + devname, chosen_name, + sra->major_version, sra->minor_version, + st->ss->major, st->minor_version); + close(mdfd); + return 1; + } + sprintf(dn, "%d:%d", sra->devs->major, sra->devs->minor); + dfd2 = dev_open(dn, O_RDONLY); + if (st->ss->load_super(st, dfd2,&super2, NULL)) { + fprintf(stderr, Name + ": Strange error loading metadata for %s.\n", + chosen_name); + close(mdfd); + close(dfd2); + return 2; + } + close(dfd2); + st->ss->getinfo_super(&info2, super2); + if (info.array.level != info2.array.level || + memcmp(info.uuid, info2.uuid, 16) != 0 || + info.array.raid_disks != info2.array.raid_disks) { + fprintf(stderr, Name + ": unexpected difference between %s and %s.\n", + chosen_name, devname); + close(mdfd); + return 2; + } + memset(&disk, 0, sizeof(disk)); + disk.major = major(stb.st_rdev); + disk.minor = minor(stb.st_rdev); + err = ioctl(mdfd, ADD_NEW_DISK, &disk); + if (err < 0 && errno == EBUSY) { + /* could be another device present with the same + * disk.number. Find and reject any such + */ + find_reject(mdfd, st, sra, info.disk.number, + info.events, verbose, chosen_name); + err = ioctl(mdfd, ADD_NEW_DISK, &disk); + } + if (err < 0) { + fprintf(stderr, Name ": failed to add %s to %s: %s.\n", + devname, chosen_name, strerror(errno)); + close(mdfd); + return 2; + } + } + /* 6/ Make sure /var/run/mdadm.map contains this array. */ + map_update(&map, devnum, + info.array.major_version, + info.array.minor_version, + info.uuid, chosen_name); + + /* 7/ Is there enough devices to possibly start the array? */ + /* 7a/ if not, finish with success. */ + active_disks = count_active(st, mdfd, &avail, &info); + if (enough(info.array.level, info.array.raid_disks, + info.array.layout, info.array.state & 1, + avail, active_disks) == 0) { + free(avail); + if (verbose >= 0) + fprintf(stderr, Name + ": %s attached to %s, not enough to start (%d).\n", + devname, chosen_name, active_disks); + close(mdfd); + return 0; + } + free(avail); + + /* 7b/ if yes, */ + /* - if number of OK devices match expected, or -R and there */ + /* are enough, */ + /* + add any bitmap file */ + /* + start the array (auto-readonly). */ +{ + mdu_array_info_t ainf; + + if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) { + if (verbose >= 0) + fprintf(stderr, Name + ": %s attached to %s which is already active.\n", + devname, chosen_name); + close (mdfd); + return 0; + } +} + if (runstop > 0 || active_disks >= info.array.working_disks) { + struct sysarray *sra; + /* Let's try to start it */ + if (match && match->bitmap_file) { + int bmfd = open(match->bitmap_file, O_RDWR); + if (bmfd < 0) { + fprintf(stderr, Name + ": Could not open bitmap file %s.\n", + match->bitmap_file); + close(mdfd); + return 1; + } + if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) { + close(bmfd); + fprintf(stderr, Name + ": Failed to set bitmapfile for %s.\n", + chosen_name); + close(mdfd); + return 1; + } + close(bmfd); + } + sra = sysfs_read(mdfd, devnum, 0); + if (sra == NULL || active_disks >= info.array.working_disks) + rv = ioctl(mdfd, RUN_ARRAY, NULL); + else + rv = sysfs_set_str(sra, NULL, + "array_state", "read-auto"); + if (rv == 0) { + if (verbose >= 0) + fprintf(stderr, Name + ": %s attached to %s, which has been started.\n", + devname, chosen_name); + rv = 0; + } else { + fprintf(stderr, Name + ": %s attached to %s, but failed to start: %s.\n", + devname, chosen_name, strerror(errno)); + rv = 1; + } + } else { + if (verbose >= 0) + fprintf(stderr, Name + ": %s attached to %s, not enough to start safely.\n", + devname, chosen_name); + rv = 0; + } + close(mdfd); + return rv; +} + +static void find_reject(int mdfd, struct supertype *st, struct sysarray *sra, + int number, __u64 events, int verbose, + char *array_name) +{ + /* Find an device attached to this array with a disk.number of number + * and events less than the passed events, and remove the device. + */ + struct sysdev *d; + mdu_array_info_t ra; + + if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0) + return; /* not safe to remove from active arrays + * without thinking more */ + + for (d = sra->devs; d ; d = d->next) { + char dn[10]; + int dfd; + void *super; + struct mdinfo info; + sprintf(dn, "%d:%d", d->major, d->minor); + dfd = dev_open(dn, O_RDONLY); + if (dfd < 0) + continue; + if (st->ss->load_super(st, dfd, &super, NULL)) { + close(dfd); + continue; + } + st->ss->getinfo_super(&info, super); + free(super); + close(dfd); + + if (info.disk.number != number || + info.events >= events) + continue; + + if (d->role > -1) + sysfs_set_str(sra, d, "slot", "none"); + if (sysfs_set_str(sra, d, "state", "remove") == 0) + if (verbose >= 0) + fprintf(stderr, Name + ": removing old device %s from %s\n", + d->name+4, array_name); + } +} + +static int count_active(struct supertype *st, int mdfd, char **availp, + struct mdinfo *bestinfo) +{ + /* count how many devices in sra think they are active */ + struct sysdev *d; + int cnt = 0, cnt1 = 0; + __u64 max_events = 0; + void *best_super = NULL; + struct sysarray *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE); + char *avail = NULL; + + for (d = sra->devs ; d ; d = d->next) { + char dn[30]; + int dfd; + void *super; + int ok; + struct mdinfo info; + + sprintf(dn, "%d:%d", d->major, d->minor); + dfd = dev_open(dn, O_RDONLY); + if (dfd < 0) + continue; + ok = st->ss->load_super(st, dfd, &super, NULL); + close(dfd); + if (ok != 0) + continue; + st->ss->getinfo_super(&info, super); + if (info.disk.state & (1<ss->getinfo_super(bestinfo,best_super); + free(best_super); + } + return cnt + cnt1; +} + +void RebuildMap(void) +{ + struct mdstat_ent *mdstat = mdstat_read(0, 0); + struct mdstat_ent *md; + struct map_ent *map = NULL; + int mdp = get_mdp_major(); + + for (md = mdstat ; md ; md = md->next) { + struct sysarray *sra = sysfs_read(-1, md->devnum, GET_DEVS); + struct sysdev *sd; + + for (sd = sra->devs ; sd ; sd = sd->next) { + char dn[30]; + int dfd; + int ok; + struct supertype *st; + char *path; + void *super; + struct mdinfo info; + + sprintf(dn, "%d:%d", sd->major, sd->minor); + dfd = dev_open(dn, O_RDONLY); + if (dfd < 0) + continue; + st = guess_super(dfd); + if ( st == NULL) + ok = -1; + else + ok = st->ss->load_super(st, dfd, &super, NULL); + close(dfd); + if (ok != 0) + continue; + st->ss->getinfo_super(&info, super); + if (md->devnum > 0) + path = map_dev(MD_MAJOR, md->devnum, 0); + else + path = map_dev(mdp, (-1-md->devnum)<< 6, 0); + map_add(&map, md->devnum, st->ss->major, + st->minor_version, + info.uuid, path ? : "/unknown"); + free(super); + break; + } + } + map_write(map); + map_free(map); +} + +int IncrementalScan(int verbose) +{ + /* look at every device listed in the 'map' file. + * If one is found that is not running then: + * look in mdadm.conf for bitmap file. + * if one exists, but array has none, add it. + * try to start array in auto-readonly mode + */ + struct map_ent *mapl = NULL; + struct map_ent *me; + mddev_ident_t devs, mddev; + int rv = 0; + + map_read(&mapl); + devs = conf_get_ident(NULL); + + for (me = mapl ; me ; me = me->next) { + char path[1024]; + mdu_array_info_t array; + mdu_bitmap_file_t bmf; + struct sysarray *sra; + int mdfd = open_mddev_devnum(me->path, me->devnum, NULL, path); + if (mdfd < 0) + continue; + if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 || + errno != ENODEV) { + close(mdfd); + continue; + } + /* Ok, we can try this one. Maybe it needs a bitmap */ + for (mddev = devs ; mddev ; mddev = mddev->next) + if (strcmp(mddev->devname, me->path) == 0) + break; + if (mddev && mddev->bitmap_file) { + /* + * Note: early kernels will wrongly fail this, so it + * is a hint only + */ + int added = -1; + if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) { + int bmfd = open(mddev->bitmap_file, O_RDWR); + if (bmfd >= 0) { + added = ioctl(mdfd, SET_BITMAP_FILE, + bmfd); + close(bmfd); + } + } + if (verbose >= 0) { + if (added == 0) + fprintf(stderr, Name + ": Added bitmap %s to %s\n", + mddev->bitmap_file, me->path); + else if (errno != EEXIST) + fprintf(stderr, Name + ": Failed to add bitmap to %s: %s\n", + me->path, strerror(errno)); + } + } + sra = sysfs_read(mdfd, 0, 0); + if (sra) { + if (sysfs_set_str(sra, NULL, + "array_state", "read-auto") == 0) { + if (verbose >= 0) + fprintf(stderr, Name + ": started array %s\n", + me->path); + } else { + fprintf(stderr, Name + ": failed to start array %s: %s\n", + me->path, strerror(errno)); + rv = 1; + } + } + } + return rv; +} diff --git a/Makefile b/Makefile index 79aa88f5..112f3da1 100644 --- a/Makefile +++ b/Makefile @@ -68,10 +68,14 @@ MAN8DIR = $(MANDIR)/man8 OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \ Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \ - mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o sha1.o + Incremental.o \ + mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o sha1.o \ + mapfile.o SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \ Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \ - mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c sha1.c + Incremental.c \ + mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c sha1.c \ + mapfile.c STATICSRC = pwgr.c STATICOBJS = pwgr.o diff --git a/Manage.c b/Manage.c index 4b5ec806..91934822 100644 --- a/Manage.c +++ b/Manage.c @@ -106,7 +106,11 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet) devname, strerror(errno)); return 1; } + if (quiet <= 0) + fprintf(stderr, Name ": started %s\n", devname); } else if (runstop < 0){ + struct map_ent *map = NULL; + struct stat stb; if (ioctl(fd, STOP_ARRAY, NULL)) { if (quiet==0) fprintf(stderr, Name ": fail to stop array %s: %s\n", @@ -115,6 +119,16 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet) } if (quiet <= 0) fprintf(stderr, Name ": stopped %s\n", devname); + if (fstat(fd, &stb) == 0) { + int devnum; + if (major(stb.st_rdev) == MD_MAJOR) + devnum = minor(stb.st_rdev); + else + devnum = -1-(minor(stb.st_rdev)>>6); + map_delete(&map, devnum); + map_write(map); + map_free(map); + } } return 0; } diff --git a/Monitor.c b/Monitor.c index 6a4c8dec..213e58d6 100644 --- a/Monitor.c +++ b/Monitor.c @@ -601,7 +601,7 @@ int Wait(char *dev) if (major(stb.st_rdev) == MD_MAJOR) devnum = minor(stb.st_rdev); else - devnum = -minor(stb.st_rdev)/16; + devnum = -1-(minor(stb.st_rdev)/64); while(1) { struct mdstat_ent *ms = mdstat_read(1, 0); diff --git a/ReadMe.c b/ReadMe.c index 739b3660..7e39c854 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -91,8 +91,9 @@ char Version[] = Name " - v2.5.6 - 9 November 2006\n"; * At the time if writing, there is only minimal support. */ -char short_options[]="-ABCDEFGQhVXWvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:"; -char short_bitmap_auto_options[]="-ABCDEFGQhVXWvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:"; +char short_options[]="-ABCDEFGIQhVXWvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:"; +char short_bitmap_auto_options[]= + "-ABCDEFGIQhVXWvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:"; struct option long_options[] = { {"manage", 0, 0, '@'}, @@ -104,6 +105,7 @@ struct option long_options[] = { {"examine", 0, 0, 'E'}, {"follow", 0, 0, 'F'}, {"grow", 0, 0, 'G'}, + {"incremental",0,0, 'I'}, {"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */ {"query", 0, 0, 'Q'}, {"examine-bitmap", 0, 0, 'X'}, @@ -179,7 +181,9 @@ struct option long_options[] = { {"syslog", 0, 0, 'y'}, /* For Grow */ {"backup-file", 1,0, BackupFile}, - + + /* For Incremental */ + {"rebuild-map", 0, 0, 'r'}, {0, 0, 0, 0} }; @@ -201,6 +205,10 @@ char Help[] = " make changes to an existing array.\n" " mdadm --misc options... devices\n" " report on or modify various md related devices.\n" +" mdadm --grow options device\n" +" resize/reshape an active array\n" +" mdadm --incremental device\n" +" add a device to an array as appropriate\n" " mdadm --monitor options...\n" " Monitor one or more array for significant changes.\n" " mdadm device options...\n" @@ -240,6 +248,8 @@ char OptionHelp[] = " --examine -E : Examine superblock on an array component\n" " --examine-bitmap -X: Display the detail of a bitmap file\n" " --monitor -F : monitor (follow) some arrays\n" +" --grow -G : resize/ reshape and array\n" +" --incremental -I : add a single device to an array as appropriate\n" " --query -Q : Display general information about how a\n" " device relates to the md driver\n" ; @@ -506,7 +516,22 @@ char Help_grow[] = " : array.\n" ; - +char Help_incr[] = +"Usage: mdadm --incremental [-Rqrs] device\n" +"\n" +"This usage allows for incremental assembly of md arrays. Devices can be\n" +"added one at a time as they are discovered. Once an array has all expected\n" +"devices, it will be started.\n" +"\n" +"Options that are valid with incremental assembly (-I --incremental) more are:\n" +" --run -R : run arrays as soon as a minimal number of devices are\n" +" : present rather than waiting for all expected.\n" +" --quiet -q : Don't print any information messages, just errors.\n" +" --rebuild -r : Rebuild the 'map' file that mdadm uses for tracking\n" +" : partial arrays.\n" +" --scan -s : Use with -R to start any arrays that have the minimal\n" +" : required number of devices, but are not yet started.\n" +; char Help_config[] = "The /etc/mdadm.conf config file:\n\n" @@ -590,6 +615,7 @@ mapping_t modes[] = { { "misc", MISC}, { "monitor", MONITOR}, { "grow", GROW}, + { "incremental", INCREMENTAL}, }; mapping_t faultylayout[] = { diff --git a/config.c b/config.c index 7101c3b0..73031b73 100644 --- a/config.c +++ b/config.c @@ -86,7 +86,7 @@ char *keywords[] = { [Mailaddr] = "mailaddr", [Mailfrom] = "mailfrom", [Program] = "program", - [CreateDev] = "create", + [CreateDev]= "create", [Homehost] = "homehost", [LTEnd] = NULL }; @@ -747,6 +747,22 @@ mddev_dev_t conf_get_devs() return dlist; } +int conf_test_dev(char *devname) +{ + struct conf_dev *cd; + if (cdevlist == NULL) + /* allow anything by default */ + return 1; + for (cd = cdevlist ; cd ; cd = cd->next) { + if (strcasecmp(cd->name, "partitions") == 0) + return 1; + if (fnmatch(cd->name, devname, FNM_PATHNAME) == 0) + return 1; + } + return 0; +} + + int match_oneof(char *devices, char *devname) { /* check if one of the comma separated patterns in devices diff --git a/kernel-patch-2.6.18 b/kernel-patch-2.6.18 new file mode 100644 index 00000000..87496ea2 --- /dev/null +++ b/kernel-patch-2.6.18 @@ -0,0 +1,35 @@ + +### Diffstat output + ./drivers/md/md.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff .prev/drivers/md/md.c ./drivers/md/md.c +--- .prev/drivers/md/md.c 2006-10-23 10:26:37.000000000 +1000 ++++ ./drivers/md/md.c 2006-12-21 16:28:29.000000000 +1100 +@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char + else { + mddev_t *mddev = rdev->mddev; + kick_rdev_from_array(rdev); +- md_update_sb(mddev); ++ if (mddev->pers) ++ md_update_sb(mddev); + md_new_event(mddev); + err = 0; + } +@@ -1994,6 +1995,8 @@ static mdk_rdev_t *md_import_device(dev_ + kobject_init(&rdev->kobj); + + rdev->desc_nr = -1; ++ rdev->saved_raid_disk = -1; ++ rdev->raid_disk = -1; + rdev->flags = 0; + rdev->data_offset = 0; + rdev->sb_events = 0; +@@ -3991,6 +3994,7 @@ static int set_array_info(mddev_t * mdde + mddev->major_version = info->major_version; + mddev->minor_version = info->minor_version; + mddev->patch_version = info->patch_version; ++ mddev->persistent = ! info->not_persistent; + return 0; + } + mddev->major_version = MD_MAJOR_VERSION; diff --git a/kernel-patch-2.6.18.6 b/kernel-patch-2.6.18.6 new file mode 100644 index 00000000..e702e14a --- /dev/null +++ b/kernel-patch-2.6.18.6 @@ -0,0 +1,35 @@ +Signed-off-by: Neil Brown + +### Diffstat output + ./drivers/md/md.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff .prev/drivers/md/md.c ./drivers/md/md.c +--- .prev/drivers/md/md.c 2006-12-21 17:08:23.000000000 +1100 ++++ ./drivers/md/md.c 2006-12-21 17:08:26.000000000 +1100 +@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char + else { + mddev_t *mddev = rdev->mddev; + kick_rdev_from_array(rdev); +- md_update_sb(mddev); ++ if (mddev->pers) ++ md_update_sb(mddev); + md_new_event(mddev); + err = 0; + } +@@ -1995,6 +1996,7 @@ static mdk_rdev_t *md_import_device(dev_ + + rdev->desc_nr = -1; + rdev->saved_raid_disk = -1; ++ rdev->raid_disk = -1; + rdev->flags = 0; + rdev->data_offset = 0; + rdev->sb_events = 0; +@@ -3993,6 +3995,7 @@ static int set_array_info(mddev_t * mdde + mddev->major_version = info->major_version; + mddev->minor_version = info->minor_version; + mddev->patch_version = info->patch_version; ++ mddev->persistent = ! info->not_persistent; + return 0; + } + mddev->major_version = MD_MAJOR_VERSION; diff --git a/kernel-patch-2.6.19 b/kernel-patch-2.6.19 new file mode 100644 index 00000000..22a67a39 --- /dev/null +++ b/kernel-patch-2.6.19 @@ -0,0 +1,34 @@ + +### Diffstat output + ./drivers/md/md.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff .prev/drivers/md/md.c ./drivers/md/md.c +--- .prev/drivers/md/md.c 2006-12-21 15:55:01.000000000 +1100 ++++ ./drivers/md/md.c 2006-12-21 16:28:09.000000000 +1100 +@@ -1792,7 +1792,8 @@ state_store(mdk_rdev_t *rdev, const char + else { + mddev_t *mddev = rdev->mddev; + kick_rdev_from_array(rdev); +- md_update_sb(mddev, 1); ++ if (mddev->pers) ++ md_update_sb(mddev, 1); + md_new_event(mddev); + err = 0; + } +@@ -2004,6 +2005,7 @@ static mdk_rdev_t *md_import_device(dev_ + + rdev->desc_nr = -1; + rdev->saved_raid_disk = -1; ++ rdev->raid_disk = -1; + rdev->flags = 0; + rdev->data_offset = 0; + rdev->sb_events = 0; +@@ -3977,6 +3979,7 @@ static int set_array_info(mddev_t * mdde + mddev->major_version = info->major_version; + mddev->minor_version = info->minor_version; + mddev->patch_version = info->patch_version; ++ mddev->persistent = ! info->not_persistent; + return 0; + } + mddev->major_version = MD_MAJOR_VERSION; diff --git a/mapfile.c b/mapfile.c new file mode 100644 index 00000000..746073d0 --- /dev/null +++ b/mapfile.c @@ -0,0 +1,197 @@ +/* + * mapfile - manage /var/run/mdadm.map. Part of: + * mdadm - manage Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2006 Neil Brown + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: + * Paper: Neil Brown + * Novell Inc + * GPO Box Q1283 + * QVB Post Office, NSW 1230 + * Australia + */ + +/* /var/run/mdadm.map is used to track arrays being created in --incremental + * more. It particularly allows lookup from UUID to array device, but + * also allows the array device name to be easily found. + * + * The map file is line based with space separated fields. The fields are: + * Device id - mdX or mdpX where is a number. + * metadata - 0.90 1.0 1.1 1.2 + * UUID - uuid of the array + * path - path where device created: /dev/md/home + * + */ + + +#include "mdadm.h" + + +int map_write(struct map_ent *mel) +{ + FILE *f; + int err; + int subdir = 1; + + f = fopen("/var/run/mdadm/map.new", "w"); + if (!f) { + f = fopen("/var/run/mdadm.map.new", "w"); + subdir = 1; + } + if (!f) + return 0; + while (mel) { + if (mel->devnum < 0) + fprintf(f, "mdp%d ", -1-mel->devnum); + else + fprintf(f, "md%d ", mel->devnum); + fprintf(f, "%d.%d ", mel->major, mel->minor); + fprintf(f, "%08x:%08x:%08x:%08x ", mel->uuid[0], + mel->uuid[1], mel->uuid[2], mel->uuid[3]); + fprintf(f, "%s\n", mel->path); + mel = mel->next; + } + fflush(f); + err = ferror(f); + fclose(f); + if (err) { + if (subdir) + unlink("/var/run/mdadm/map.new"); + else + unlink("/var/run/mdadm.map.new"); + return 0; + } + if (subdir) + return rename("/var/run/mdadm/map.new", + "/var/run/mdadm/map") == 0; + else + return rename("/var/run/mdadm.map.new", + "/var/run/mdadm.map") == 0; +} + +void map_add(struct map_ent **melp, + int devnum, int major, int minor, int uuid[4], char *path) +{ + struct map_ent *me = malloc(sizeof(*me)); + + me->devnum = devnum; + me->major = major; + me->minor = minor; + memcpy(me->uuid, uuid, 16); + me->path = strdup(path); + me->next = *melp; + *melp = me; +} + +void map_read(struct map_ent **melp) +{ + FILE *f; + char buf[8192]; + char path[200]; + int devnum, major, minor, uuid[4]; + char nam[4]; + + *melp = NULL; + + f = fopen("/var/run/mdadm/map", "r"); + if (!f) + f = fopen("/var/run/mdadm.map", "r"); + if (!f) + return; + + while (fgets(buf, sizeof(buf), f)) { + if (sscanf(buf, " md%1[p]%d %d.%d %x:%x:%x:%x %200s", + nam, &devnum, &major, &minor, uuid, uuid+1, + uuid+2, uuid+3, path) == 9) { + if (nam[0] == 'p') + devnum = -1 - devnum; + map_add(melp, devnum, major, minor, uuid, path); + } + } + fclose(f); +} + +void map_free(struct map_ent *map) +{ + while (map) { + struct map_ent *mp = map; + map = mp->next; + free(mp->path); + free(mp); + } +} + +int map_update(struct map_ent **mpp, int devnum, int major, int minor, + int *uuid, char *path) +{ + struct map_ent *map, *mp; + int rv; + + if (mpp && *mpp) + map = *mpp; + else + map_read(&map); + + for (mp = map ; mp ; mp=mp->next) + if (mp->devnum == devnum) { + mp->major = major; + mp->minor = minor; + memcpy(mp->uuid, uuid, 16); + free(mp->path); + mp->path = strdup(path); + break; + } + if (!mp) + map_add(&map, devnum, major, minor, uuid, path); + *mpp = NULL; + rv = map_write(map); + map_free(map); + return rv; +} + +void map_delete(struct map_ent **mapp, int devnum) +{ + struct map_ent *mp; + + if (*mapp == NULL) + map_read(mapp); + + for (mp = *mapp; mp; mp = *mapp) { + if (mp->devnum == devnum) { + *mapp = mp->next; + free(mp->path); + free(mp); + } else + mapp = & mp->next; + } +} + +struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]) +{ + struct map_ent *mp; + if (!*map) + map_read(map); + + for (mp = *map ; mp ; mp = mp->next) + if (memcmp(uuid, mp->uuid, 16) == 0) + return mp; + return NULL; + +} diff --git a/mdadm.8 b/mdadm.8 index 61604627..c00f5faf 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -88,7 +88,7 @@ provides a layer over a true device that can be used to inject faults. '''with a different format and a different purpose. .SH MODES -mdadm has 7 major modes of operation: +mdadm has several major modes of operation: .TP .B Assemble Assemble the parts of a previously created @@ -131,6 +131,16 @@ Currently supported growth options including changing the active size of component devices in RAID level 1/4/5/6 and changing the number of active devices in RAID1. +.TP +.B "Incremental Assembly" +Add a single device to an appropriate array. If the addition of the +device makes the array runnable, the array will be started. +This provides a convenient interface to a +.I hot-plug +system. As each device is detected, +.I mdadm +has a chance to include it in some array as appropriate. + .TP .B Manage This is for doing things to specific components of an array such as @@ -169,6 +179,11 @@ mode. .TP .BR -G ", " --grow Change the size or shape of an active array. + +.TP +.BE -I ", " --incremental +Add a single device into an appropriate array, and possibly start the array. + .P If a device is given before any options, or if the first option is .BR --add , @@ -939,6 +954,32 @@ activity to finish before returning. will return with success if it actually waited for every device listed, otherwise it will return failure. +.SH For Incremental Assembly mode: +.TP +.BR --rebuild-map ", " -r +Rebuild the map file +.RB ( /var/run/mdadm/map ) +that +.I mdadm +uses to help track which arrays are currently being assembled. + +.TP +.BR --run ", " -R +Run any array assembled as soon as a minimal number of devices are +available, rather than waiting until all expected devices are present. + +.TP +.BR --scan ", " -s +Only meaningful with +.B -R +this will scan the +.B map +file for arrays that are being incrementally assembled and will try to +start any that are not already started. If any such array is listed +in +.B mdadm.conf +as requiring an external bitmap, that bitmap will be attached first. + .SH For Monitor mode: .TP .BR -m ", " --mail @@ -1680,6 +1721,153 @@ can be added. Note that if you add a bitmap stored in a file which is in a filesystem that is on the raid array being affected, the system will deadlock. The bitmap must be on a separate filesystem. +.SH INCREMENTAL MODE + +.HP 12 +Usage: +.B mdadm --incremental +.RB [ --run ] +.RB [ --quiet ] +.I component-device +.HP 12 +Usage: +.B mdadm --incremental --rebuild +.HP 12 +Usage: +.B mdadm --incremental --run --scan + + +.PP +This mode is designed to be used in conjunction with a device +discovery system. As devices are found in a system, they can be +passed to +.B "mdadm --incremental" +to be conditionally added to an appropriate array. + +.I mdadm +performs a number of tests to determine if the device is part of an +array, and which array is should be part of. If an appropriate array +is found, or can be created, +.I mdadm +adds the device to the array and conditionally starts the array. + +Note that +.I mdadm +will only add devices to an array which were previously working +(active or spare) parts of that array. It does not currently support +automatic inclusion of a new drive as a spare in some array. + +.B "mdadm --incremental" +requires a bug present in all kernels through 2.6.19, to be fixed. +Hopefully this will be fixed in 2.6.20. Alternately apply the patch +which is included with the mdadm source distribution. If +.I mdadm +detects that this bug is present, it will abort any attempt to use +.BR --incremental . + +The tests that +.I mdadm +makes are as follow: +.IP + +Is the device permitted by +.BR mdadm.conf ? +That is, is it listed in a +.B DEVICES +line in that file. If +.B DEVICES +is absent then the default it to allow any device. Similar if +.B DEVICES +contains the special word +.B partitions +then any device is allowed. Otherwise the device name given to +.I mdadm +must match one of the names or patterns in a +.B DEVICES +line. + +.IP + +Does the device have a valid md superblock. If a specific metadata +version is request with +.B --metadata +or +.B -e +then only that style of metadata is accepted, otherwise +.I mdadm +finds any known version of metadata. If no +.I md +metadata is found, the device is rejected. + +.IP + +Does the metadata match an expected array? +The metadata can match in two ways. Either there is an array listed +in +.B mdadm.conf +which identifies the array (either by UUID, by name, by device list, +or by minor-number), the array was created with a +.B homehost +specified, and that +.B homehost +matches that which is given in +.B mdadm.conf +or on the command line. +If +.I mdadm +is not able to positively identify the array as belonging to the +current host, the device will be rejected. + +.IP + +.I mdadm +keeps a list of arrays that is has partly assembled in +.B /var/run/mdadm/map +(or +.B /var/run/mdadm.map +if the directory doesn't exist). If no array exists which matches +the metadata on the new device, +.I mdadm +must choose a device name and unit number. It does this based on any +name given in +.B mdadm.conf +or any name information stored in the metadata. If this name +suggests a unit number, that number will be used, otherwise a free +unit number will be chosen. Normally +.I mdadm +will prefer to create a partitionable array, however if the +.B CREATE +line in +.B mdadm.conf +suggests that a non-partitionable array is preferred, that will be +honoured. + +.IP + +Once an appropriate array is found or created and the device is added, +.I mdadm +must decide if the array is ready to be started. It will +normally compare the number of available (non-spare) devices to the +number of devices that the metadata suggests need to be active. If +there are at least that many, the array will be started. This means +that if any devices are missing the array will not be restarted. + +As an alternative, +.B --run +may be passed to +.B mdadm +in which case the array will be run as soon as there are enough +devices present for the data to be accessible. For a raid1, that +means one device will start the array. For a clean raid5, the array +will be started as soon as all but one drive is present. + +Note that neither of these approaches is really ideal. If it is can +be known that all device discovery has completed, then +.br +.B " mdadm -IRs" +.br +can be run which will try to start all arrays that are being +incrementally assembled. They are started in "read-auto" mode in +which they are read-only until the first write request. This means +that no metadata updates are made and no attempt at resync or recovery +happens. Further devices that are found before the first write can +still be added safely. + .SH EXAMPLES .B " mdadm --query /dev/name-of-device" @@ -1755,6 +1943,16 @@ the background in monitor mode monitoring all md devices. Also write pid of mdadm daemon to .BR /var/run/mdadm . +.B " mdadm -Iq /dev/somedevice" +.br +Try to incorporate newly discovered device into some array as +appropriate. + +.B " mdadm --incremental --rebuild --run --scan" +.br +Rebuild the array map from any current arrays, and then start any that +can be started. + .B " mdadm --create --help" .br Provide help about the Create mode. @@ -1792,6 +1990,16 @@ they contain MD super block, and gives identifying information .BR mdadm.conf (5) for more details. +.SS /var/run/mdadm/map +When +.I --incremental +mode is used. this file gets a list of arrays currently being created. +If +.B /var/run/mdadm +does not exist as a directory, then +.B /var/run/mdadm.map +is used instead. + .SH DEVICE NAMES While entries in the /dev directory can have any format you like, diff --git a/mdadm.c b/mdadm.c index b5dce73a..9bec2958 100644 --- a/mdadm.c +++ b/mdadm.c @@ -101,6 +101,7 @@ int main(int argc, char *argv[]) int re_add = 0; char *shortopt = short_options; int dosyslog = 0; + int rebuild_map = 0; int auto_update_home = 0; int copies; @@ -191,6 +192,7 @@ int main(int argc, char *argv[]) case 'C': newmode = CREATE; shortopt = short_bitmap_auto_options; break; case 'F': newmode = MONITOR;break; case 'G': newmode = GROW; shortopt = short_bitmap_auto_options; break; + case 'I': newmode = INCREMENTAL; break; case '#': case 'D': @@ -269,6 +271,7 @@ int main(int argc, char *argv[]) case 'C': case 'F': case 'G': + case 'I': continue; } if (opt == 1) { @@ -321,6 +324,7 @@ int main(int argc, char *argv[]) case O(ASSEMBLE,AutoHomeHost): auto_update_home = 1; continue; + case O(INCREMENTAL, 'e'): case O(CREATE,'e'): case O(ASSEMBLE,'e'): case O(MISC,'e'): /* set metadata (superblock) information */ @@ -628,6 +632,7 @@ int main(int argc, char *argv[]) case O(ASSEMBLE,'s'): /* scan */ case O(MISC,'s'): case O(MONITOR,'s'): + case O(INCREMENTAL,'s'): scan = 1; continue; @@ -702,6 +707,7 @@ int main(int argc, char *argv[]) case O(MANAGE,'f'): /* set faulty */ devmode = 'f'; continue; + case O(INCREMENTAL,'R'): case O(MANAGE,'R'): case O(ASSEMBLE,'R'): case O(BUILD,'R'): @@ -833,6 +839,10 @@ int main(int argc, char *argv[]) } } continue; + + case O(INCREMENTAL, 'r'): + rebuild_map = 1; + continue; } /* We have now processed all the valid options. Anything else is * an error @@ -861,6 +871,7 @@ int main(int argc, char *argv[]) case MISC : help_text = Help_misc; break; case MONITOR : help_text = Help_monitor; break; case GROW : help_text = Help_grow; break; + case INCREMENTAL:help_text= Help_incr; break; } fputs(help_text,stderr); exit(0); @@ -1289,6 +1300,34 @@ int main(int argc, char *argv[]) } else fprintf(stderr, Name ": no changes to --grow\n"); break; + case INCREMENTAL: + if (rebuild_map) { + RebuildMap(); + } + if (scan) { + if (runstop <= 0) { + fprintf(stderr, Name + ": --incremental --scan meaningless without --run.\n"); + break; + } + rv = IncrementalScan(verbose); + } + if (!devlist) { + if (!rebuild_map && !scan) { + fprintf(stderr, Name + ": --incremental requires a device.\n"); + rv = 1; + } + break; + } + if (devlist->next) { + fprintf(stderr, Name + ": --incremental can only handle one device.\n"); + rv = 1; + break; + } + rv = Incremental(devlist->devname, verbose-quiet, runstop, + ss, homehost, autof); } exit(rv); } diff --git a/mdadm.h b/mdadm.h index d40d1873..3831f42b 100644 --- a/mdadm.h +++ b/mdadm.h @@ -146,6 +146,7 @@ enum mode { MISC, MONITOR, GROW, + INCREMENTAL, }; extern char short_options[]; @@ -153,6 +154,7 @@ extern char short_bitmap_auto_options[]; extern struct option long_options[]; extern char Version[], Usage[], Help[], OptionHelp[], Help_create[], Help_build[], Help_assemble[], Help_grow[], + Help_incr[], Help_manage[], Help_misc[], Help_monitor[], Help_config[]; /* for option that don't have short equivilents, we assign arbitrary @@ -238,6 +240,24 @@ struct mdstat_ent { extern struct mdstat_ent *mdstat_read(int hold, int start); extern void free_mdstat(struct mdstat_ent *ms); extern void mdstat_wait(int seconds); +extern int mddev_busy(int devnum); + +struct map_ent { + struct map_ent *next; + int devnum; + int major,minor; + int uuid[4]; + char *path; +}; +extern int map_update(struct map_ent **mpp, int devnum, int major, int minor, + int uuid[4], char *path); +extern struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]); +extern void map_read(struct map_ent **melp); +extern int map_write(struct map_ent *mel); +extern void map_delete(struct map_ent **mapp, int devnum); +extern void map_free(struct map_ent *map); +extern void map_add(struct map_ent **melp, + int devnum, int major, int minor, int uuid[4], char *path); /* Data structure for holding info read from sysfs */ struct sysdev { @@ -259,6 +279,7 @@ struct sysarray { int spares; int cache_size; int mismatch_cnt; + int major_version, minor_version; }; /* various details can be requested */ #define GET_LEVEL 1 @@ -267,6 +288,7 @@ struct sysarray { #define GET_CHUNK 8 #define GET_CACHE 16 #define GET_MISMATCH 32 +#define GET_VERSION 64 #define GET_DEVS 1024 /* gets role, major, minor */ #define GET_OFFSET 2048 @@ -277,6 +299,7 @@ struct sysarray { /* If fd >= 0, get the array it is open on, * else use devnum. >=0 -> major9. <0..... */ +extern void sysfs_free(struct sysarray *sra); extern struct sysarray *sysfs_read(int fd, int devnum, unsigned long options); extern int sysfs_set_str(struct sysarray *sra, struct sysdev *dev, char *name, char *val); @@ -345,6 +368,8 @@ struct supertype { extern struct supertype *super_by_version(int vers, int minor); extern struct supertype *guess_super(int fd); extern int get_dev_size(int fd, char *dname, unsigned long long *sizep); +extern void get_one_disk(int mdfd, mdu_array_info_t *ainf, + mdu_disk_info_t *disk); #if __GNUC__ < 3 struct stat64; @@ -426,6 +451,11 @@ extern int Monitor(mddev_dev_t devlist, extern int Kill(char *dev, int force, int quiet); extern int Wait(char *dev); +extern int Incremental(char *devname, int verbose, int runstop, + struct supertype *st, char *homehost, int autof); +extern void RebuildMap(void); +extern int IncrementalScan(int verbose); + extern int CreateBitmap(char *filename, int force, char uuid[16], unsigned long chunksize, unsigned long daemon_sleep, unsigned long write_behind, @@ -448,6 +478,7 @@ extern int is_standard(char *dev, int *nump); extern int parse_auto(char *str, char *msg, int config); extern mddev_ident_t conf_get_ident(char *dev); extern mddev_dev_t conf_get_devs(void); +extern int conf_test_dev(char *devname); extern struct createinfo *conf_get_create_info(void); extern void set_conffile(char *file); extern char *conf_get_mailaddr(void); @@ -479,6 +510,8 @@ extern char *get_md_name(int dev); extern char DefaultConfFile[]; extern int open_mddev(char *dev, int autof); +extern int open_mddev_devnum(char *devname, int devnum, char *name, + char *chosen_name); #define LEVEL_MULTIPATH (-4) diff --git a/mdopen.c b/mdopen.c index 9f3dfb84..0b6951d2 100644 --- a/mdopen.c +++ b/mdopen.c @@ -292,3 +292,51 @@ int open_mddev(char *dev, int autof) return mdfd; } + +int open_mddev_devnum(char *devname, int devnum, char *name, char *chosen_name) +{ + /* Open the md device with number 'devnum', possibly using 'devname', + * possibly constructing a name with 'name', but in any case, copying + * the name into 'chosen_name' + */ + int major, minor; + struct stat stb; + + if (devname) + strcpy(chosen_name, devname); + else if (name && strchr(name,'/') == NULL) { + char *n = strchr(name, ':'); + if (n) n++; else n = name; + if (isdigit(*n) && devnum < 0) + sprintf(chosen_name, "/dev/md/d%s", n); + else + sprintf(chosen_name, "/dev/md/%s", n); + } else { + if (devnum >= 0) + sprintf(chosen_name, "/dev/md%d", devnum); + else + sprintf(chosen_name, "/dev/md/d%d", -1-devnum); + } + if (devnum >= 0) { + major = MD_MAJOR; + minor = devnum; + } else { + major = get_mdp_major(); + minor = (-1-devnum) << 6; + } + if (stat(chosen_name, &stb) == 0) { + /* It already exists. Check it is right. */ + if ( ! S_ISBLK(stb.st_mode) || + stb.st_rdev != makedev(major, minor)) { + errno = EEXIST; + return -1; + } + } else { + if (mknod(chosen_name, S_IFBLK | 0600, + makedev(major, minor)) != 0) { + return -1; + } + /* FIXME chown/chmod ?? */ + } + return open(chosen_name, O_RDWR); +} diff --git a/mdstat.c b/mdstat.c index 5eeac6cc..de31acbf 100644 --- a/mdstat.c +++ b/mdstat.c @@ -251,3 +251,15 @@ void mdstat_wait(int seconds) tm.tv_usec = 0; select(mdstat_fd >2 ? mdstat_fd+1:3, NULL, NULL, &fds, &tm); } + +int mddev_busy(int devnum) +{ + struct mdstat_ent *mdstat = mdstat_read(0, 0); + struct mdstat_ent *me; + + for (me = mdstat ; me ; me = me->next) + if (me->devnum == devnum) + break; + free_mdstat(mdstat); + return me != NULL; +} diff --git a/super0.c b/super0.c index 757d9056..8f328436 100644 --- a/super0.c +++ b/super0.c @@ -110,6 +110,9 @@ static void examine_super0(void *sbv, char *homehost) } else printf(" UUID : %08x\n", sb->set_uuid0); + if (sb->not_persistent) + printf(" Eedk : not persistent\n"); + atime = sb->ctime; printf(" Creation Time : %.24s\n", ctime(&atime)); c=map_num(pers, sb->level); diff --git a/sysfs.c b/sysfs.c index 25ede6b7..16744f1b 100644 --- a/sysfs.c +++ b/sysfs.c @@ -42,6 +42,18 @@ int load_sys(char *path, char *buf) return 0; } +void sysfs_free(struct sysarray *sra) +{ + if (!sra) + return; + while (sra->devs) { + struct sysdev *d = sra->devs; + sra->devs = d->next; + free(d); + } + free(sra); +} + struct sysarray *sysfs_read(int fd, int devnum, unsigned long options) { /* Longest possible name in sysfs, mounted at /sys, is @@ -81,6 +93,16 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options) base = fname + strlen(fname); sra->devs = NULL; + if (options & GET_VERSION) { + strcpy(base, "metadata_version"); + if (load_sys(fname, buf)) + goto abort; + if (strncmp(buf, "none", 4) == 0) + sra->major_version = sra->minor_version = -1; + else + sscanf(buf, "%d.%d", + &sra->major_version, &sra->minor_version); + } if (options & GET_LEVEL) { strcpy(base, "level"); if (load_sys(fname, buf)) @@ -144,6 +166,7 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options) goto abort; dev->next = sra->devs; sra->devs = dev; + strcpy(dev->name, de->d_name); /* Always get slot, major, minor */ strcpy(dbase, "slot"); @@ -191,12 +214,7 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options) return sra; abort: - while (sra && sra->devs) { - dev = sra->devs; - sra->devs = dev->next; - free(dev); - } - if(sra) free(sra); + sysfs_free(sra); return NULL; } diff --git a/util.c b/util.c index c21bf514..58449d52 100644 --- a/util.c +++ b/util.c @@ -815,6 +815,14 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep) return 1; } +void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk) +{ + int d; + ioctl(mdfd, GET_ARRAY_INFO, ainf); + for (d = 0 ; d < ainf->raid_disks + ainf->nr_disks ; d++) + if (ioctl(mdfd, GET_DISK_INFO, disk) == 0) + return; +} #ifdef __TINYC__ /* tinyc doesn't optimize this check in ioctl.h out ... */ unsigned int __invalid_size_argument_for_IOC = 0; -- 2.39.2