From e86c9dd6d847ec57ec400b118efaf2c1808f10bc Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 13 Mar 2006 05:51:32 +0000 Subject: [PATCH] Initial reshape support Needs work for other levels etc. Signed-off-by: Neil Brown --- Grow.c | 408 ++++++++++++++++++++++++++++++++++++++++++- Makefile | 9 +- mdadm.c | 3 +- mdadm.h | 56 ++++++ restripe.c | 324 ++++++++++++++++++++++++++++++++++ sysfs.c | 265 ++++++++++++++++++++++++++++ tests/07testreshape5 | 38 ++++ util.c | 28 --- 8 files changed, 1098 insertions(+), 33 deletions(-) create mode 100644 restripe.c create mode 100644 sysfs.c create mode 100644 tests/07testreshape5 diff --git a/Grow.c b/Grow.c index 9e8e2172..ece2bda3 100644 --- a/Grow.c +++ b/Grow.c @@ -306,7 +306,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int continue; if (st->ss->load_super(st, fd2, &super, NULL)==0) { if (st->ss->add_internal_bitmap(st, super, - chunk, delay, write_behind, + chunk, delay, write_behind, bitmapsize, 0, major)) st->ss->write_bitmap(st, fd2, super); else { @@ -378,4 +378,410 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int return 0; } + +/* + * When reshaping an array we might need to backup some data. + * This is written to all spares with a 'super_block' describing it. + * The superblock goes 1K form the end of the used space on the + * device. + * It if written after the backup is complete. + * It has the following structure. + */ + +struct mdp_backup_super { + char magic[16]; /* md_backup_data-1 */ + __u8 set_uuid[16]; + __u64 mtime; + /* start/sizes in 512byte sectors */ + __u64 devstart; + __u64 arraystart; + __u64 length; + __u32 sb_csum; /* csum of preceeding bytes. */ +}; + +int bsb_csum(char *buf, int len) +{ + int i; + int csum = 0; + for (i=0; i= 0) { + fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n", + devname); + return 1; + } + if (level != UnSet && level != LEVEL_FAULTY) { + fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n", + devname); + return 1; + } + if (chunksize || raid_disks) { + fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n", + devname); + return 1; + } + if (layout == UnSet) + return 0; /* nothing to do.... */ + + array.layout = layout; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set layout for %s: %s\n", + devname, strerror(errno)); + return 1; + } + if (!quiet) + printf("layout for %s set to %d\n", devname, array.layout); + return 0; + + case 1: /* raid_disks and size can each be changed. They are independant */ + + if (level != UnSet && level != 1) { + fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n", + devname); + return 1; + } + if (chunksize || layout != UnSet) { + fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n", + devname); + return 1; + } + + /* Each can trigger a resync/recovery which will block the + * other from happening. Later we could block + * resync for the duration via 'sync_action'... + */ + if (raid_disks >= 0) + array.raid_disks = raid_disks; + if (size >= 0) + array.size = size; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", + devname, strerror(errno)); + return 1; + } + return 0; + + case 4: + case 5: + case 6: + st = super_by_version(array.major_version, + array.minor_version); + /* size can be changed independantly. + * layout/chunksize/raid_disks/level can be changed + * though the kernel may not support it all. + * If 'suspend_lo' is not present in devfs, then + * these cannot be changed. + */ + if (size >= 0) { + /* Cannot change other details as well.. */ + if (layout != UnSet || + chunksize != 0 || + raid_disks != 0 || + level != UnSet) { + fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n", + devname, c); + return 1; + } + array.size = size; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", + devname, strerror(errno)); + return 1; + } + return 0; + } + /* Ok, just change the shape. This can be awkward. + * There are three possibilities. + * 1/ The array will shrink. We don't support this + * possibility. Maybe one day... + * 2/ The array will not change size. This is easy enough + * to do, but not reliably. If the process is aborted + * the array *will* be corrupted. So maybe we can allow + * this but only if the user is really certain. e.g. + * --really-risk-everything + * 3/ The array will grow. This can be reliably achieved. + * However the kernel's restripe routines will cheerfully + * overwrite some early data before it is safe. So we + * need to make a backup of the early parts of the array + * and be ready to restore it if rebuild aborts very early. + * + * We backup data by writing it to all spares (there must be + * at least 1, so even raid6->raid5 requires a spare to be + * present). + * + * So: we enumerate the devices in the array and + * make sure we can open all of them. + * Then we freeze the early part of the array and + * backup to the various spares. + * Then we request changes and start the reshape. + * Monitor progress until it has passed the danger zone. + * and finally invalidate the copied data and unfreeze the + * start of the array. + * + * Before we can do this we need to decide: + * - will the array grow? Just calculate size + * - how much needs to be saved: count stripes. + * - where to save data... good question. + * + */ + nlevel = olevel = array.level; + nchunk = ochunk = array.chunk_size; + nlayout = olayout = array.layout; + ndisks = odisks = array.raid_disks; + + if (level != UnSet) nlevel = level; + if (chunksize) nchunk = chunksize; + if (layout != UnSet) nlayout = layout; + if (raid_disks) ndisks = raid_disks; + + odata = odisks-1; + if (olevel == 6) odata--; /* number of data disks */ + ndata = ndisks-1; + if (nlevel == 6) ndata--; + + if (ndata < odata) { + fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n", + devname); + return 1; + } + if (ndata == odata) { + fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n", + devname); + return 1; + } + /* Well, it is growing... so how much do we need to backup. + * Need to backup a full number of new-stripes, such that the + * last one does not over-write any place that it would be read + * from + */ + nstripe = ostripe = 0; + while (nstripe+ochunk/512 >= ostripe) { + nstripe += nchunk/512; + last_block = nstripe * ndata; + ostripe = last_block / odata; + } + printf("Need to backup to stripe %llu sectors, %lluK\n", nstripe, last_block/2); + + sra = sysfs_read(fd, 0, + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE); + if (!sra) { + fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n", + devname); + return 1; + } + + if (last_block >= sra->component_size/2) { + fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n", + devname); + return 1; + } + + nrdisks = array.nr_disks + sra->spares; + /* Now we need to open all these devices so we can read/write. + */ + fdlist = malloc(nrdisks * sizeof(int)); + offsets = malloc(nrdisks * sizeof(offsets[0])); + if (!fdlist || !offsets) { + fprintf(stderr, Name ": malloc failed: grow aborted\n"); + return 1; + } + for (d=0; d< nrdisks; d++) + fdlist[d] = -1; + d = array.raid_disks; + for (sd = sra->devs; sd; sd=sd->next) { + if (sd->state & (1<state & (1<major, sd->minor); + fdlist[sd->role] = open(dn, O_RDONLY); + offsets[sd->role] = sd->offset; + if (fdlist[sd->role] < 0) { + fprintf(stderr, Name ": %s: cannot open component %s\n", + devname, dn); + goto abort; + } + } else { + /* spare */ + char *dn = map_dev(sd->major, sd->minor); + fdlist[d] = open(dn, O_RDWR); + offsets[d] = sd->offset; + if (fdlist[d]<0) { + fprintf(stderr, Name ": %s: cannot open component %s\n", + devname, dn); + goto abort; + } + d++; + } + } + for (i=0 ; iss->load_super(st, fdlist[0], &super, NULL)) { + fprintf(stderr, Name ": %s: Cannot find a superblock\n", + devname); + goto abort; + } + + spares = sra->spares; + + /* Decide offset for the backup and llseek the spares */ + for (i=array.raid_disks; icomponent_size - last_block - 8; + if (lseek64(fdlist[i], offsets[i]<<9, 0) != offsets[i]<<9) { + fprintf(stderr, Name ": could not seek...\n"); + goto abort; + } + } + array.level = nlevel; + array.raid_disks = ndisks; + array.chunk_size = nchunk; + array.layout = nlayout; + if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { + fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", + devname, strerror(errno)); + goto abort; + } + + /* suspend the relevant region */ + sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */ + if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 || + sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) { + fprintf(stderr, Name ": %s: failed to suspend device.\n", + devname); + goto abort_resume; + } + + + err = save_stripes(fdlist, offsets, + odisks, ochunk, olevel, olayout, + spares, fdlist+odisks, + 0ULL, nstripe*512); + + /* abort if there was an error */ + if (err < 0) { + fprintf(stderr, Name ": %s: failed to save critical region\n", + devname); + goto abort_resume; + } + /* FIXME write superblocks */ + memcpy(bsb.magic, "md_backups_data-1", 16); + st->ss->uuid_from_super((int*)&bsb.set_uuid, super); + bsb.mtime = time(0); + bsb.arraystart = 0; + bsb.length = last_block; + for (i=odisks; i= nstripe) + break; + sleep(1); + } + /* invalidate superblocks */ + memset(&bsb, 0, sizeof(bsb)); + for (i=odisks; i= 0) + close(fdlist[i]); + free(fdlist); + free(offsets); + + break; + } + return 0; + + + abort_resume: + sysfs_set_num(sra, NULL, "suspend_lo", last_block); + abort: + for (i=0; i= 0) + close(fdlist[i]); + free(fdlist); + free(offsets); + return 1; + +} diff --git a/Makefile b/Makefile index c310fca7..3e18f9d0 100644 --- a/Makefile +++ b/Makefile @@ -59,10 +59,10 @@ MAN8DIR = $(MANDIR)/man8 OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \ Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \ - mdopen.o super0.o super1.o bitmap.o + mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \ Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \ - mdopen.c super0.c super1.c bitmap.c + mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c super0.c super1.c ASSEMBLE_FLAGS:= -DMDASSEMBLE @@ -73,7 +73,7 @@ endif all : mdadm mdadm.man md.man mdadm.conf.man -everything: all mdadm.static mdadm.uclibc swap_super mdassemble mdassemble.uclibc mdassemble.static mdassemble.man +everything: all mdadm.static mdadm.uclibc swap_super test_stripe mdassemble mdassemble.uclibc mdassemble.static mdassemble.man # mdadm.tcc doesn't work.. mdadm : $(OBJS) @@ -92,6 +92,9 @@ mdadm.klibc : $(SRCS) mdadm.h rm -f $(OBJS) gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS) +test_stripe : restripe.c mdadm.h + $(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c + mdassemble : $(ASSEMBLE_SRCS) mdadm.h rm -f $(OBJS) $(DIET_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) diff --git a/mdadm.c b/mdadm.c index 1c9388c2..53479996 100644 --- a/mdadm.c +++ b/mdadm.c @@ -1155,7 +1155,8 @@ int main(int argc, char *argv[]) } else if (layout != UnSet) rv = Manage_reconfig(devlist->devname, mdfd, layout); else if (size >= 0 || raiddisks) - rv = Manage_resize(devlist->devname, mdfd, size, raiddisks); + rv = Grow_reshape(devlist->devname, mdfd, quiet, + size, level, layout, chunk, raiddisks); else if (bitmap_file) { if (delay == 0) delay = DEFAULT_BITMAP_DELAY; rv = Grow_addbitmap(devlist->devname, mdfd, bitmap_file, diff --git a/mdadm.h b/mdadm.h index 00abefe0..61d0469c 100644 --- a/mdadm.h +++ b/mdadm.h @@ -178,6 +178,54 @@ extern struct mdstat_ent *mdstat_read(int hold, int start); extern void free_mdstat(struct mdstat_ent *ms); extern void mdstat_wait(int seconds); +/* Data structure for holding info read from sysfs */ +struct sysdev { + char name[20]; + int role; + int major, minor; + unsigned long long offset, size; + int state; + int errors; + struct sysdev *next; +}; +struct sysarray { + char name[20]; + struct sysdev *devs; + int chunk; + unsigned long long component_size; + int layout; + int level; + int spares; +}; +/* various details can be requested */ +#define GET_LEVEL 1 +#define GET_LAYOUT 2 +#define GET_COMPONENT 4 +#define GET_CHUNK 8 + +#define GET_DEVS 1024 /* gets role, major, minor */ +#define GET_OFFSET 2048 +#define GET_SIZE 4096 +#define GET_STATE 8192 +#define GET_ERROR 16384 + +/* If fd >= 0, get the array it is open on, + * else use devnum. >=0 -> major9. <0..... + */ +extern struct sysarray *sysfs_read(int fd, int devnum, unsigned long options); +extern int sysfs_set_str(struct sysarray *sra, struct sysdev *dev, + char *name, char *val); +extern int sysfs_set_num(struct sysarray *sra, struct sysdev *dev, + char *name, unsigned long long val); +extern int sysfs_get_ll(struct sysarray *sra, struct sysdev *dev, + char *name, unsigned long long *val); + + +extern int save_stripes(int *source, unsigned long long *offsets, + int raid_disks, int chunk_size, int level, int layout, + int nwrites, int *dest, + unsigned long long start, unsigned long long length); + #ifndef Sendmail #define Sendmail "/usr/lib/sendmail -t" #endif @@ -251,6 +299,9 @@ extern int Manage_subdevs(char *devname, int fd, mddev_dev_t devlist, int verbose); extern int Grow_Add_device(char *devname, int fd, char *newdev); extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force); +extern int Grow_reshape(char *devname, int fd, int quiet, + long long size, + int level, int layout, int chunksize, int raid_disks); extern int Assemble(struct supertype *st, char *mddev, int mdfd, @@ -367,3 +418,8 @@ extern int open_mddev(char *dev, int autof); #define makedev(M,m) (((M)<<8) | (m)) #endif +/* for raid5 */ +#define ALGORITHM_LEFT_ASYMMETRIC 0 +#define ALGORITHM_RIGHT_ASYMMETRIC 1 +#define ALGORITHM_LEFT_SYMMETRIC 2 +#define ALGORITHM_RIGHT_SYMMETRIC 3 diff --git a/restripe.c b/restripe.c new file mode 100644 index 00000000..94a0e3d8 --- /dev/null +++ b/restripe.c @@ -0,0 +1,324 @@ +/* + * mdadm - manage Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2006 Neil Brown + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: + */ + +#include "mdadm.h" + +/* To restripe, we read from old geometry to a buffer, and + * read from buffer to new geometry. + * When reading we don't worry about parity. When writing we do. + * + */ + +static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout) +{ + /* On the given stripe, find which disk in the array with have + * block numbered 'block'. + */ + int pd; + + switch(level*100 + layout) { + case 000: + case 400: + /* raid 4 isn't messed around by parity blocks */ + if (block == -1) + return raid_disks-1; /* parity block */ + return block; + case 500 + ALGORITHM_LEFT_ASYMMETRIC: + pd = (raid_disks-1) - stripe % raid_disks; + if (block == -1) return pd; + if (block >= pd) + block++; + return block; + + case 500 + ALGORITHM_RIGHT_ASYMMETRIC: + pd = stripe % raid_disks; + if (block == -1) return pd; + if (block >= pd) + block++; + return block; + + case 500 + ALGORITHM_LEFT_SYMMETRIC: + pd = (raid_disks - 1) - stripe % raid_disks; + if (block == -1) return pd; + return (pd + 1 + block) % raid_disks; + + case 500 + ALGORITHM_RIGHT_SYMMETRIC: + pd = stripe % raid_disks; + if (block == -1) return pd; + return (pd + 1 + block) % raid_disks; + + case 600 + ALGORITHM_LEFT_ASYMMETRIC: + pd = raid_disks - 1 - (stripe % raid_disks); + if (block == -1) return pd; + if (pd == raid_disks - 1) + return block+1; + if (block >= pd) + return block+2; + return block; + + case 600 + ALGORITHM_RIGHT_ASYMMETRIC: + pd = stripe % raid_disks; + if (block == -1) return pd; + if (pd == raid_disks - 1) + return block+1; + if (block >= pd) + return block+2; + return block; + + case 600 + ALGORITHM_LEFT_SYMMETRIC: + pd = raid_disks - 1 - (stripe % raid_disks); + if (block == -1) return pd; + return (pd + 2 + block) % raid_disks; + + case 600 + ALGORITHM_RIGHT_SYMMETRIC: + pd = stripe % raid_disks; + if (block == -1) return pd; + return (pd + 2 + block) % raid_disks; + } + return -1; +} + + +static void xor_blocks(char *target, char **sources, int disks, int size) +{ + int i, j; + /* Amazingly inefficient... */ + for (i=0; i 0) { + unsigned long long offset; + int i; + len = chunk_size - cpos; + if (len > sizeof(buf)) len = sizeof(buf); + if (len > length) len = length; + /* len bytes to be moved from one device */ + + offset = (start/chunk_size/data_disks)*chunk_size + cpos; + disk = start/chunk_size % data_disks; + disk = geo_map(disk, start/chunk_size/data_disks, + raid_disks, level, layout); + if (lseek64(source[disk], offsets[disk]+offset, 0) < 0) + return -1; + if (read(source[disk], buf, len) != len) + return -1; + for (i=0; i= chunk_size) cpos -= chunk_size; + } + return 0; +} + +/* Restore data: + * We are given: + * A list of 'fds' of the active disks. Some may be '-1' for not-available. + * A geometry: raid_disks, chunk_sisze, level, layout + * An 'fd' to read from. It is already seeked to the right (Read) location. + * A start and length. + * The length must be a multiple of the stripe size. + * + * We build a full stripe in memory and then write it out. + * We assume that there are enough working devices. + */ +int restore_stripes(int *dest, unsigned long long *offsets, + int raid_disks, int chunk_size, int level, int layout, + int source, + unsigned long long start, unsigned long long length) +{ + char *stripe_buf = malloc(raid_disks * chunk_size); + char **stripes = malloc(raid_disks * sizeof(char*)); + char **blocks = malloc(raid_disks * sizeof(char*)); + int i; + + int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); + + if (stripe_buf == NULL || stripes == NULL || blocks == NULL) { + free(stripe_buf); + free(stripes); + free(blocks); + return -2; + } + for (i=0; i 0) { + int len = data_disks * chunk_size; + unsigned long long offset; + if (length < len) + return -3; + for (i=0; i < data_disks; i++) { + int disk = geo_map(i, start/chunk_size/data_disks, + raid_disks, level, layout); + blocks[i] = stripes[disk]; + if (read(source, stripes[disk], chunk_size) != chunk_size) + return -1; + } + /* We have the data, now do the parity */ + offset = (start/chunk_size/data_disks) * chunk_size; + if (level >= 4) { + int disk = geo_map(-1, start/chunk_size/data_disks, + raid_disks, level, layout); + xor_blocks(stripes[disk], blocks, data_disks, chunk_size); + /* FIXME need to do raid6 Q as well */ + } + for (i=0; i < raid_disks ; i++) + if (dest[i] >= 0) { + if (lseek64(dest[i], offsets[i]+offset, 0) < 0) + return -1; + if (write(dest[i], stripes[i], chunk_size) != chunk_size) + return -1; + } + length -= len; + start += len; + } + return 0; +} + +#ifdef MAIN + +unsigned long long getnum(char *str, char **err) +{ + char *e; + unsigned long long rv = strtoull(str, &e, 10); + if (e==str || *e) { + *err = str; + return 0; + } + return rv; +} + +main(int argc, char *argv[]) +{ + /* save/restore file raid_disks chunk_size level layout start length devices... + */ + int save; + int *fds; + char *file; + int storefd; + unsigned long long *offsets; + int raid_disks, chunk_size, level, layout; + unsigned long long start, length; + int i; + + char *err = NULL; + if (argc < 10) { + fprintf(stderr, "Usage: test_stripe save/restore file raid_disks" + " chunk_size level layout start length devices...\n"); + exit(1); + } + if (strcmp(argv[1], "save")==0) + save = 1; + else if (strcmp(argv[1], "restore") == 0) + save = 0; + else { + fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n"); + exit(2); + } + + file = argv[2]; + raid_disks = getnum(argv[3], &err); + chunk_size = getnum(argv[4], &err); + level = getnum(argv[5], &err); + layout = getnum(argv[6], &err); + start = getnum(argv[7], &err); + length = getnum(argv[8], &err); + if (err) { + fprintf(stderr, "test_stripe: Bad number: %s\n", err); + exit(2); + } + if (argc != raid_disks + 9) { + fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n", + raid_disks, argc-9); + exit(2); + } + fds = malloc(raid_disks * sizeof(*fds)); + offsets = malloc(raid_disks * sizeof(*offsets)); + memset(offsets, 0, raid_disks * sizeof(*offsets)); + + storefd = open(file, O_RDWR); + if (storefd < 0) { + perror(file); + fprintf(stderr, "test_stripe: could not open %s.\n", file); + exit(3); + } + for (i=0; i + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: + */ + +#include "mdadm.h" +#include + +int load_sys(char *path, char *buf) +{ + int fd = open(path, O_RDONLY); + int n; + if (fd < 0) + return -1; + n = read(fd, buf, 1024); + close(fd); + if (n <=0 || n >= 1024) + return -1; + buf[n] = 0; + if (buf[n-1] == '\n') + buf[n-1] = 0; + return 0; +} + +struct sysarray *sysfs_read(int fd, int devnum, unsigned long options) +{ + /* Longest possible name in sysfs, mounted at /sys, is + * /sys/block/md_dXXX/md/dev-XXXXX/block/dev + * /sys/block/md_dXXX/md/metadata_version + * which is about 41 characters. 50 should do for now + */ + char fname[50]; + char buf[1024]; + char *base; + char *dbase; + struct sysarray *sra; + struct sysdev *dev; + DIR *dir; + struct dirent *de; + + sra = malloc(sizeof(*sra)); + if (sra == NULL) + return sra; + + if (fd >= 0) { + struct stat stb; + if (fstat(fd, &stb)) return NULL; + if (major(stb.st_rdev)==9) + sprintf(sra->name, "md%d", minor(stb.st_rdev)); + else + sprintf(sra->name, "md_d%d", + minor(stb.st_rdev)/16); + } else { + if (devnum >= 0) + sprintf(sra->name, "md%d", devnum); + else + sprintf(sra->name, "md_d%d", + -1-devnum); + } + sprintf(fname, "/sys/block/%s/md/", sra->name); + base = fname + strlen(fname); + + sra->devs = NULL; + if (options & GET_LEVEL) { + strcpy(base, "level"); + if (load_sys(fname, buf)) + goto abort; + sra->level = map_name(pers, buf); + } + if (options & GET_LAYOUT) { + strcpy(base, "layout"); + if (load_sys(fname, buf)) + goto abort; + sra->layout = strtoul(buf, NULL, 0); + } + if (options & GET_COMPONENT) { + strcpy(base, "component_size"); + if (load_sys(fname, buf)) + goto abort; + sra->component_size = strtoull(buf, NULL, 0); + } + if (options & GET_CHUNK) { + strcpy(base, "chunk_size"); + if (load_sys(fname, buf)) + goto abort; + sra->chunk = strtoul(buf, NULL, 0); + } + + if (! (options & GET_DEVS)) + return sra; + + /* Get all the devices as well */ + *base = 0; + dir = opendir(fname); + if (!dir) + goto abort; + sra->spares = 0; + + while ((de = readdir(dir)) != NULL) { + char *ep; + if (de->d_ino == 0 || + strncmp(de->d_name, "dev-", 4) != 0) + continue; + strcpy(base, de->d_name); + dbase = base + strlen(base); + *dbase++ = '/'; + + dev = malloc(sizeof(*dev)); + if (!dev) + goto abort; + dev->next = sra->devs; + sra->devs = dev; + + /* Always get slot, major, minor */ + strcpy(dbase, "slot"); + if (load_sys(fname, buf)) + goto abort; + dev->role = strtoul(buf, &ep, 10); + if (*ep) dev->role = -1; + + strcpy(dbase, "block/dev"); + if (load_sys(fname, buf)) + goto abort; + sscanf(buf, "%d:%d", &dev->major, &dev->minor); + + if (options & GET_OFFSET) { + strcpy(dbase, "offset"); + if (load_sys(fname, buf)) + goto abort; + dev->offset = strtoull(buf, NULL, 0); + } + if (options & GET_SIZE) { + strcpy(dbase, "size"); + if (load_sys(fname, buf)) + goto abort; + dev->size = strtoull(buf, NULL, 0); + } + if (options & GET_STATE) { + dev->state = 0; + strcpy(dbase, "state"); + if (load_sys(fname, buf)) + goto abort; + if (strstr(buf, "in_sync")) + dev->state |= (1<state |= (1<state == 0) + sra->spares++; + } + if (options & GET_ERROR) { + strcpy(buf, "errors"); + if (load_sys(fname, buf)) + goto abort; + dev->errors = strtoul(buf, NULL, 0); + } + } + return sra; + + abort: + while (sra && sra->devs) { + dev = sra->devs; + sra->devs = dev->next; + free(dev); + } + if(sra) free(sra); + return NULL; +} + +unsigned long long get_component_size(int fd) +{ + /* Find out the component size of the array. + * We cannot trust GET_ARRAY_INFO ioctl as it's + * size field is only 32bits. + * So look in /sys/block/mdXXX/md/component_size + */ + struct stat stb; + char fname[50]; + int n; + if (fstat(fd, &stb)) return 0; + if (major(stb.st_rdev) == 9) + sprintf(fname, "/sys/block/md%d/md/component_size", + minor(stb.st_rdev)); + else + sprintf(fname, "/sys/block/md_d%d/md/component_size", + minor(stb.st_rdev)/16); + fd = open(fname, O_RDONLY); + if (fd < 0) + return 0; + n = read(fd, fname, sizeof(fname)); + close(fd); + if (n == sizeof(fname)) + return 0; + fname[n] = 0; + return strtoull(fname, NULL, 10); +} + +int sysfs_set_str(struct sysarray *sra, struct sysdev *dev, + char *name, char *val) +{ + char fname[50]; + int n; + int fd; + sprintf(fname, "/sys/block/%s/md/%s/%s", + sra->name, dev?dev->name:"", name); + fd = open(fname, O_WRONLY); + if (fd < 0) + return -1; + n = write(fd, val, strlen(val)); + close(fd); + if (n != strlen(val)) + return -1; + return 0; +} + +int sysfs_set_num(struct sysarray *sra, struct sysdev *dev, + char *name, unsigned long long val) +{ + char valstr[50]; + sprintf(valstr, "%llu", val); + return sysfs_set_str(sra, dev, name, valstr); +} + +int sysfs_get_ll(struct sysarray *sra, struct sysdev *dev, + char *name, unsigned long long *val) +{ + char fname[50]; + char buf[50]; + int n; + int fd; + char *ep; + sprintf(fname, "/sys/block/%s/md/%s/%s", + sra->name, dev?dev->name:"", name); + fd = open(fname, O_RDONLY); + if (fd < 0) + return -1; + n = read(fd, buf, sizeof(buf)); + close(fd); + if (n <= 0) + return -1; + buf[n] = 0; + *val = strtoull(buf, &ep, 0); + if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) + return -1; + return 0; +} diff --git a/tests/07testreshape5 b/tests/07testreshape5 new file mode 100644 index 00000000..8f56a72f --- /dev/null +++ b/tests/07testreshape5 @@ -0,0 +1,38 @@ + +# +# test the reshape code by using test_reshape and the +# kernel md code to move data into and out of variously +# shaped md arrays. +set -x +layouts=(la ra ls rs) +for chunk in 4 8 16 32 64 128 +do + devs="$dev1" + for disks in 2 3 4 5 6 + do + eval devs=\"$devs \$dev$disks\" + for nlayout in 0 1 2 3 + do + layout=${layouts[$nlayout]} + + size=$[chunk*(disks-1)*disks] + + # test restore: make a raid5 from a file, then do a compare + dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$size + $dir/test_stripe restore /tmp/RandFile $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs + $mdadm -CR $md0 -amd -l5 -n$disks --assume-clean -c $chunk -p $layout $devs + cmp -s -n $[size*1024] $md0 /tmp/RandFile || { echo cmp failed ; exit 2; } + + # FIXME check parity + + # test save + dd if=/dev/urandom of=$md0 bs=1024 count=$size + > /tmp/NewRand + $dir/test_stripe save /tmp/NewRand $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs + cmp -s -n $[size*1024] $md0 /tmp/NewRand || { echo cmp failed ; exit 2; } + $mdadm -S $md0 + done + done +done +exit 0 + diff --git a/util.c b/util.c index 26254c08..0af43313 100644 --- a/util.c +++ b/util.c @@ -676,34 +676,6 @@ struct supertype *guess_super(int fd) return NULL; } -unsigned long long get_component_size(int fd) -{ - /* Find out the component size of the array. - * We cannot trust GET_ARRAY_INFO ioctl as it's - * size field is only 32bits. - * So look in /sys/block/mdXXX/md/component_size - */ - struct stat stb; - char fname[50]; - int n; - if (fstat(fd, &stb)) return 0; - if (major(stb.st_rdev) == 9) - sprintf(fname, "/sys/block/md%d/md/component_size", - minor(stb.st_rdev)); - else - sprintf(fname, "/sys/block/md_d%d/md/component_size", - minor(stb.st_rdev)/16); - fd = open(fname, O_RDONLY); - if (fd < 0) - return 0; - n = read(fd, fname, sizeof(fname)); - close(fd); - if (n == sizeof(fname)) - return 0; - fname[n] = 0; - return strtoull(fname, NULL, 10); -} - #ifdef __TINYC__ /* tinyc doesn't optimize this check in ioctl.h out ... */ -- 2.39.2