From: NeilBrown Date: Tue, 14 Apr 2009 01:11:14 +0000 (+1000) Subject: Merge branch 'master' of git://github.com/djbw/mdadm into devel-3.0 X-Git-Tag: mdadm-3.0-rc1~28 X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=commitdiff_plain;h=c256924e52249b52bad42963176c42601f1a08f4;hp=7e7fffc4022114c491587755998395ef4766fcc2 Merge branch 'master' of git://github.com/djbw/mdadm into devel-3.0 Conflicts: Grow.c mdadm.h sysfs.c Due to independent fixes for the "mdadm hangs if reshape finishes too quickly" problem. --- diff --git a/.gitignore b/.gitignore index 86e075ed..2503bd8b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,7 @@ /*-stamp /mdadm /mdadm.udeb +/mdmon +/swap_super +/test_stripe +/TAGS diff --git a/Assemble.c b/Assemble.c index 99f35991..4cf54058 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1069,7 +1069,7 @@ int Assemble(struct supertype *st, char *mddev, fprintf(stderr, "\n"); } sysfs_uevent(content, "change"); - wait_for(chosen_name); + wait_for(chosen_name, mdfd); close(mdfd); return 0; } @@ -1104,8 +1104,8 @@ int Assemble(struct supertype *st, char *mddev, (4 * content->array.chunk_size / 4096) + 1); } } + wait_for(mddev, mdfd); close(mdfd); - wait_for(mddev); if (auto_assem) { int usecs = 1; /* There is a nasty race with 'mdadm --monitor'. @@ -1228,20 +1228,23 @@ int assemble_container_content(struct supertype *st, int mdfd, sysfs_free(sra); for (dev = content->devs; dev; dev = dev->next) - if (sysfs_add_disk(content, dev) == 0) + if (sysfs_add_disk(content, dev, 1) == 0) working++; else if (errno == EEXIST) preexist++; if (working == 0) { close(mdfd); return 1;/* Nothing new, don't try to start */ - } else if (runstop > 0 || + } + + map_update(&map, fd2devnum(mdfd), + content->text_version, + content->uuid, chosen_name); + + if (runstop > 0 || (working + preexist) >= content->array.working_disks) { int err; - map_update(&map, fd2devnum(mdfd), - content->text_version, - content->uuid, chosen_name); switch(content->array.level) { case LEVEL_LINEAR: case LEVEL_MULTIPATH: @@ -1276,7 +1279,7 @@ int assemble_container_content(struct supertype *st, int mdfd, fprintf(stderr, "\n"); } if (!err) - wait_for(chosen_name); + wait_for(chosen_name, mdfd); close(mdfd); return 0; /* FIXME should have an O_EXCL and wait for read-auto */ diff --git a/Build.c b/Build.c index 52fc0ca0..358f8e6f 100644 --- a/Build.c +++ b/Build.c @@ -281,8 +281,8 @@ int Build(char *mddev, int chunk, int level, int layout, if (verbose >= 0) fprintf(stderr, Name ": array %s built and started.\n", mddev); + wait_for(mddev, mdfd); close(mdfd); - wait_for(mddev); return 0; abort: diff --git a/Create.c b/Create.c index d33f8914..36df10dd 100644 --- a/Create.c +++ b/Create.c @@ -261,6 +261,8 @@ int Create(struct supertype *st, char *mddev, return 1; } + if (size && chunk) + size &= ~(unsigned long long)(chunk - 1); newsize = size * 2; if (st && ! st->ss->validate_geometry(st, level, layout, raiddisks, chunk, size*2, NULL, &newsize, verbose>=0)) @@ -808,7 +810,7 @@ int Create(struct supertype *st, char *mddev, sysfs_uevent(&info, "change"); if (verbose >= 0) fprintf(stderr, Name ": container %s prepared.\n", mddev); - wait_for(chosen_name); + wait_for(chosen_name, mdfd); } else if (runstop == 1 || subdevs >= raiddisks) { if (st->ss->external) { switch(level) { @@ -844,7 +846,7 @@ int Create(struct supertype *st, char *mddev, ping_monitor(devnum2devname(st->container_dev)); close(container_fd); } - wait_for(chosen_name); + wait_for(chosen_name, mdfd); } else { fprintf(stderr, Name ": not starting array - not enough devices.\n"); } diff --git a/Grow.c b/Grow.c index 7083c184..15cec5f1 100644 --- a/Grow.c +++ b/Grow.c @@ -809,7 +809,6 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, /* wait for reshape to pass the critical region */ while(1) { unsigned long long comp; - char a[20]; if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) { sleep(5); @@ -817,12 +816,14 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, } if (comp >= nstripe) break; - - /* perhaps the entire reshape has completed */ - if (comp == 0 && - sysfs_get_str(sra, NULL, "sync_action", a, sizeof(a)) == 0 && - strncmp(a, "idle", 4) == 0) - break; + if (comp == 0) { + /* Maybe it finished already */ + char action[20]; + if (sysfs_get_str(sra, NULL, "sync_action", + action, 20) > 0 && + strncmp(action, "reshape", 7) != 0) + break; + } sleep(1); } diff --git a/Incremental.c b/Incremental.c index 99fc1bfc..f7e602ec 100644 --- a/Incremental.c +++ b/Incremental.c @@ -335,32 +335,34 @@ int Incremental(char *devname, int verbose, int runstop, sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE)); - sprintf(dn, "%d:%d", sra->devs->disk.major, - sra->devs->disk.minor); - dfd2 = dev_open(dn, O_RDONLY); - st2 = dup_super(st); - if (st2->ss->load_super(st2, dfd2, NULL) || - st->ss->compare_super(st, st2) != 0) { - fprintf(stderr, Name - ": metadata mismatch between %s and " - "chosen array %s\n", - devname, chosen_name); - close(mdfd); + if (sra->devs) { + sprintf(dn, "%d:%d", sra->devs->disk.major, + sra->devs->disk.minor); + dfd2 = dev_open(dn, O_RDONLY); + st2 = dup_super(st); + if (st2->ss->load_super(st2, dfd2, NULL) || + st->ss->compare_super(st, st2) != 0) { + fprintf(stderr, Name + ": metadata mismatch between %s and " + "chosen array %s\n", + devname, chosen_name); + close(mdfd); + close(dfd2); + return 2; + } close(dfd2); - return 2; - } - close(dfd2); - memset(&info2, 0, sizeof(info2)); - st2->ss->getinfo_super(st2, &info2); - st2->ss->free_super(st2); - if (info.array.level != info2.array.level || - memcmp(info.uuid, info2.uuid, 16) != 0 || - info.array.raid_disks != info2.array.raid_disks) { - fprintf(stderr, Name - ": unexpected difference between %s and %s.\n", - chosen_name, devname); - close(mdfd); - return 2; + memset(&info2, 0, sizeof(info2)); + st2->ss->getinfo_super(st2, &info2); + st2->ss->free_super(st2); + if (info.array.level != info2.array.level || + memcmp(info.uuid, info2.uuid, 16) != 0 || + info.array.raid_disks != info2.array.raid_disks) { + fprintf(stderr, Name + ": unexpected difference between %s and %s.\n", + chosen_name, devname); + close(mdfd); + return 2; + } } info2.disk.major = major(stb.st_rdev); info2.disk.minor = minor(stb.st_rdev); @@ -392,18 +394,24 @@ int Incremental(char *devname, int verbose, int runstop, /* 7a/ if not, finish with success. */ if (info.array.level == LEVEL_CONTAINER) { /* Try to assemble within the container */ - close(mdfd); map_unlock(&map); sysfs_uevent(&info, "change"); if (verbose >= 0) fprintf(stderr, Name ": container %s now has %d devices\n", chosen_name, info.array.working_disks); - wait_for(chosen_name); + wait_for(chosen_name, mdfd); + close(mdfd); if (runstop < 0) return 0; /* don't try to assemble */ - return Incremental(chosen_name, verbose, runstop, - NULL, homehost, autof); + rv = Incremental(chosen_name, verbose, runstop, + NULL, homehost, autof); + if (rv == 1) + /* Don't fail the whole -I if a subarray didn't + * have enough devices to start yet + */ + rv = 0; + return rv; } avail = NULL; active_disks = count_active(st, mdfd, &avail, &info); @@ -474,7 +482,7 @@ int Incremental(char *devname, int verbose, int runstop, ": %s attached to %s, which has been started.\n", devname, chosen_name); rv = 0; - wait_for(chosen_name); + wait_for(chosen_name, mdfd); } else { fprintf(stderr, Name ": %s attached to %s, but failed to start: %s.\n", @@ -729,7 +737,6 @@ int Incremental_container(struct supertype *st, char *devname, int verbose, char chosen_name[1024]; struct map_ent *mp; struct mddev_ident_s *match = NULL; - int err; mp = map_by_uuid(&map, ra->uuid); @@ -789,10 +796,8 @@ int Incremental_container(struct supertype *st, char *devname, int verbose, return 2; } - err = assemble_container_content(st, mdfd, ra, runstop, - chosen_name, verbose); - if (err) - return err; + assemble_container_content(st, mdfd, ra, runstop, + chosen_name, verbose); } map_unlock(&map); return 0; diff --git a/Makefile b/Makefile index 94a55d93..63c94542 100644 --- a/Makefile +++ b/Makefile @@ -209,7 +209,7 @@ install-udev: udev-md-raid.rules uninstall: rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 md.4 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm -test: mdadm test_stripe swap_super +test: mdadm mdmon test_stripe swap_super @echo "Please run 'sh ./test' as root" clean : diff --git a/Manage.c b/Manage.c index 7afd89b7..1f38fa54 100644 --- a/Manage.c +++ b/Manage.c @@ -702,7 +702,7 @@ int Manage_subdevs(char *devname, int fd, tst->ss->getinfo_super(tst, &new_mdi); new_mdi.disk.major = disc.major; new_mdi.disk.minor = disc.minor; - if (sysfs_add_disk(sra, &new_mdi) != 0) { + if (sysfs_add_disk(sra, &new_mdi, 0) != 0) { fprintf(stderr, Name ": add new device to external metadata" " failed for %s\n", dv->devname); close(container_fd); diff --git a/config.c b/config.c index 7e09b5ca..24fbfcf9 100644 --- a/config.c +++ b/config.c @@ -563,7 +563,9 @@ void arrayline(char *line) w); } } - if (mis.uuid_set == 0 && mis.devices == NULL && mis.super_minor == UnSet && mis.name[0] == 0) + if (mis.uuid_set == 0 && mis.devices == NULL && + mis.super_minor == UnSet && mis.name[0] == 0 && + (mis.container == NULL && mis.member == NULL)) fprintf(stderr, Name ": ARRAY line %s has no identity information.\n", mis.devname); else { mi = malloc(sizeof(*mi)); diff --git a/managemon.c b/managemon.c index e02c77ea..3835c995 100644 --- a/managemon.c +++ b/managemon.c @@ -395,7 +395,7 @@ static void manage_member(struct mdstat_ent *mdstat, * and open files for each newdev */ for (d = newdev; d ; d = d->next) { struct mdinfo *newd; - if (sysfs_add_disk(&newa->info, d) < 0) + if (sysfs_add_disk(&newa->info, d, 0) < 0) continue; newd = malloc(sizeof(*newd)); *newd = *d; diff --git a/mapfile.c b/mapfile.c index f2762322..01185e6d 100644 --- a/mapfile.c +++ b/mapfile.c @@ -2,7 +2,7 @@ * mapfile - manage /var/run/mdadm.map. Part of: * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2006 Neil Brown + * Copyright (C) 2006-2009 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -38,23 +38,48 @@ * UUID - uuid of the array * path - path where device created: /dev/md/home * + * The preferred location for the map file is /var/run/mdadm.map. + * However /var/run may not exist or be writable in early boot. And if + * no-one has created /var/run/mdadm, we still want to survive. + * So possible locations are: + * /var/run/mdadm/map /var/run/mdadm.map /dev/.mdadm.map + * the last, because udev requires a writable /dev very early. + * We read from the first one that exists and write to the first + * one that we can. */ +#include "mdadm.h" +#define mapnames(base) { #base, #base ".new", #base ".lock"} +char *mapname[3][3] = { + mapnames(/var/run/mdadm/map), + mapnames(/var/run/mdadm.map), + mapnames(/dev/.mdadm.map) +}; -#include "mdadm.h" +int mapmode[3] = { O_RDONLY, O_RDWR|O_CREAT, O_RDWR|O_CREAT | O_TRUNC }; +char *mapsmode[3] = { "r", "w", "w"}; +FILE *open_map(int modenum, int *choice) +{ + int i; + for (i = 0 ; i < 3 ; i++) { + int fd = open(mapname[i][modenum], mapmode[modenum], 0600); + if (fd >= 0) { + *choice = i; + return fdopen(fd, mapsmode[modenum]); + } + } + return NULL; +} int map_write(struct map_ent *mel) { FILE *f; int err; - int subdir = 1; + int which; + + f = open_map(1, &which); - f = fopen("/var/run/mdadm/map.new", "w"); - if (!f) { - f = fopen("/var/run/mdadm.map.new", "w"); - subdir = 0; - } if (!f) return 0; for (; mel; mel = mel->next) { @@ -73,37 +98,25 @@ int map_write(struct map_ent *mel) err = ferror(f); fclose(f); if (err) { - if (subdir) - unlink("/var/run/mdadm/map.new"); - else - unlink("/var/run/mdadm.map.new"); + unlink(mapname[which][1]); return 0; } - if (subdir) - return rename("/var/run/mdadm/map.new", - "/var/run/mdadm/map") == 0; - else - return rename("/var/run/mdadm.map.new", - "/var/run/mdadm.map") == 0; + return rename(mapname[which][1], + mapname[which][0]) == 0; } -static int lfd = -1; -static int lsubdir = 0; +static FILE *lf = NULL; +static int lwhich = 0; int map_lock(struct map_ent **melp) { - if (lfd < 0) { - lfd = open("/var/run/mdadm/map.lock", O_CREAT|O_RDWR, 0600); - if (lfd < 0) { - lfd = open("/var/run/mdadm.map.lock", O_CREAT|O_RDWR, 0600); - lsubdir = 0; - } else - lsubdir = 1; - if (lfd < 0) + if (lf == NULL) { + lf = open_map(2, &lwhich); + if (lf == NULL) return -1; - if (lockf(lfd, F_LOCK, 0) != 0) { - close(lfd); - lfd = -1; + if (lockf(fileno(lf), F_LOCK, 0) != 0) { + fclose(lf); + lf = NULL; return -1; } } @@ -115,13 +128,10 @@ int map_lock(struct map_ent **melp) void map_unlock(struct map_ent **melp) { - if (lfd >= 0) - close(lfd); - if (lsubdir) - unlink("/var/run/mdadm/map.lock"); - else - unlink("/var/run/mdadm.map.lock"); - lfd = -1; + if (lf) + fclose(lf); + unlink(mapname[lwhich][2]); + lf = NULL; } void map_add(struct map_ent **melp, @@ -146,18 +156,15 @@ void map_read(struct map_ent **melp) int devnum, uuid[4]; char metadata[30]; char nam[4]; + int which; *melp = NULL; - f = fopen("/var/run/mdadm/map", "r"); - if (!f) - f = fopen("/var/run/mdadm.map", "r"); + f = open_map(0, &which); if (!f) { RebuildMap(); - f = fopen("/var/run/mdadm/map", "r"); + f = open_map(0, &which); } - if (!f) - f = fopen("/var/run/mdadm.map", "r"); if (!f) return; diff --git a/mdadm.c b/mdadm.c index 1fef4b99..4402eba8 100644 --- a/mdadm.c +++ b/mdadm.c @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) shortopt, long_options, &option_index)) != -1) { int newmode = mode; - /* firstly, some mode-independant options */ + /* firstly, some mode-independent options */ switch(opt) { case 'h': if (option_index > 0 && @@ -1231,10 +1231,17 @@ int main(int argc, char *argv[]) */ struct mdstat_ent *ms = mdstat_read(0, 1); struct mdstat_ent *e; + struct map_ent *map = NULL; int v = verbose>1?0:verbose+1; for (e=ms ; e ; e=e->next) { - char *name = get_md_name(e->devnum); + char *name; + struct map_ent *me; + me = map_by_devnum(&map, e->devnum); + if (me && me->path) + name = me->path; + else + name = get_md_name(e->devnum); if (!name) { fprintf(stderr, Name ": cannot find device file for %s\n", diff --git a/mdadm.h b/mdadm.h index 82c7dedf..251f9a13 100644 --- a/mdadm.h +++ b/mdadm.h @@ -373,10 +373,11 @@ extern int sysfs_uevent(struct mdinfo *sra, char *event); extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, char *name, unsigned long long *val); extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev, - char *name, char *buf, int buf_len); + char *name, char *val, int size); extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms); extern int sysfs_set_array(struct mdinfo *info, int vers); -extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd); +extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, + int in_sync); extern int sysfs_disk_to_scsi_id(int fd, __u32 *id); extern int sysfs_unique_holder(int devnum, long rdev); extern int load_sys(char *path, char *buf); @@ -660,7 +661,7 @@ extern struct supertype *dup_super(struct supertype *st); extern int get_dev_size(int fd, char *dname, unsigned long long *sizep); extern void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk); -void wait_for(char *dev); +void wait_for(char *dev, int fd); #if __GNUC__ < 3 struct stat64; diff --git a/super-ddf.c b/super-ddf.c index 6a870556..fa4d351c 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -643,6 +643,7 @@ static int load_ddf_local(int fd, struct ddf_super *super, struct stat stb; char *conf; int i; + int confsec; int vnum; int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries); unsigned long long dsize; @@ -693,11 +694,11 @@ static int load_ddf_local(int fd, struct ddf_super *super, 0); vnum = 0; - for (i = 0; - i < __be32_to_cpu(super->active->config_section_length); - i += super->conf_rec_len) { + for (confsec = 0; + confsec < __be32_to_cpu(super->active->config_section_length); + confsec += super->conf_rec_len) { struct vd_config *vd = - (struct vd_config *)((char*)conf + i*512); + (struct vd_config *)((char*)conf + confsec*512); struct vcl *vcl; if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) { @@ -781,22 +782,20 @@ static int load_super_ddf(struct supertype *st, int fd, /* 32M is a lower bound */ if (dsize <= 32*1024*1024) { - if (devname) { + if (devname) fprintf(stderr, Name ": %s is too small for ddf: " "size is %llu sectors.\n", devname, dsize>>9); - return 1; - } + return 1; } if (dsize & 511) { - if (devname) { + if (devname) fprintf(stderr, Name ": %s is an odd size for ddf: " "size is %llu bytes.\n", devname, dsize); - return 1; - } + return 1; } if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) { @@ -1062,9 +1061,9 @@ static void examine_vd(int n, struct ddf_super *sb, char *guid) map_num(ddf_sec_level, vc->srl) ?: "-unknown-"); } printf(" Device Size[%d] : %llu\n", n, - __be64_to_cpu(vc->blocks)/2); + (unsigned long long)__be64_to_cpu(vc->blocks)/2); printf(" Array Size[%d] : %llu\n", n, - __be64_to_cpu(vc->array_blocks)/2); + (unsigned long long)__be64_to_cpu(vc->array_blocks)/2); } } @@ -1100,7 +1099,7 @@ static void examine_pds(struct ddf_super *sb) int i; struct dl *dl; printf(" Physical Disks : %d\n", cnt); - printf(" Number RefNo Size Device Type/State\n"); + printf(" Number RefNo Size Device Type/State\n"); for (i=0 ; iphys->entries[i]; @@ -1111,18 +1110,19 @@ static void examine_pds(struct ddf_super *sb) //printf("\n"); printf(" %3d %08x ", i, __be32_to_cpu(pd->refnum)); - printf("%lluK ", __be64_to_cpu(pd->config_size)>>1); + printf("%8lluK ", + (unsigned long long)__be64_to_cpu(pd->config_size)>>1); for (dl = sb->dlist; dl ; dl = dl->next) { if (dl->disk.refnum == pd->refnum) { char *dv = map_dev(dl->major, dl->minor, 0); if (dv) { - printf("%-10s", dv); + printf("%-15s", dv); break; } } } if (!dl) - printf("%10s",""); + printf("%15s",""); printf(" %s%s%s%s%s", (type&2) ? "active":"", (type&4) ? "Global-Spare":"", @@ -2654,6 +2654,8 @@ validate_geometry_ddf_container(struct supertype *st, close(fd); *freesize = avail_size_ddf(st, ldsize >> 9); + if (*freesize == 0) + return 0; return 1; } @@ -2918,6 +2920,8 @@ static struct mdinfo *container_content_ddf(struct supertype *st) if (vc->conf.phys_refnum[i] == 0xFFFFFFFF) continue; + this->array.working_disks++; + for (d = ddf->dlist; d ; d=d->next) if (d->disk.refnum == vc->conf.phys_refnum[i]) break; @@ -2925,8 +2929,6 @@ static struct mdinfo *container_content_ddf(struct supertype *st) /* Haven't found that one yet, maybe there are others */ continue; - this->array.working_disks++; - dev = malloc(sizeof(*dev)); memset(dev, 0, sizeof(*dev)); dev->next = this->devs; diff --git a/sysfs.c b/sysfs.c index 31c92f78..b6156636 100644 --- a/sysfs.c +++ b/sysfs.c @@ -467,23 +467,22 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, } int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev, - char *name, char *buf, int buf_len) + char *name, char *val, int size) { char fname[50]; int n; int fd; - sprintf(fname, "/sys/block/%s/md/%s/%s", sra->sys_name, dev?dev->sys_name:"", name); fd = open(fname, O_RDONLY); if (fd < 0) return -1; - n = read(fd, buf, buf_len); + n = read(fd, val, size); close(fd); if (n <= 0) return -1; - buf[n] = 0; - return 0; + val[n] = 0; + return n; } int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms) @@ -545,7 +544,7 @@ int sysfs_set_array(struct mdinfo *info, int vers) return rv; } -int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd) +int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int in_sync) { char dv[100]; char nm[100]; @@ -571,8 +570,12 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd) rv = sysfs_set_num(sra, sd, "offset", sd->data_offset); rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2); if (sra->array.level != LEVEL_CONTAINER) { + if (in_sync) + /* This can correctly fail if array isn't started, + * yet, so just ignore status for now. + */ + sysfs_set_str(sra, sd, "state", "in_sync"); rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk); -// rv |= sysfs_set_str(sra, sd, "state", "in_sync"); } return rv; } diff --git a/test b/test index 1cd341f1..133f8ff4 100644 --- a/test +++ b/test @@ -39,10 +39,13 @@ mdsize1b=19988 mdsize11=19992 mdsize12=19988 +# ddf needs bigger devices as 32Meg is reserved! +ddfsize=65536 + cleanup() { udevadm settle $mdadm -Ssq - for d in 0 1 2 3 4 5 6 7 + for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 do losetup -d /dev/loop$d ; # rm -f $targetdir/mdtest$d done @@ -51,9 +54,11 @@ cleanup() { trap cleanup 0 1 2 3 15 devlist= -for d in 0 1 2 3 4 5 6 7 +for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 do - [ -f $targetdir/mdtest$d ] || dd if=/dev/zero of=$targetdir/mdtest$d count=$size bs=1K > /dev/null 2>&1 + sz=$size + if [ $d -gt 7 ]; then sz=$ddfsize ; fi + [ -f $targetdir/mdtest$d ] || dd if=/dev/zero of=$targetdir/mdtest$d count=$sz bs=1K > /dev/null 2>&1 [ -b /dev/loop$d ] || mknod /dev/loop$d b 7 $d if [ $d -eq 7 ] then @@ -69,6 +74,8 @@ done path0=$dev6 path1=$dev7 +ulimit -c unlimited +[ -f /proc/mdstat ] || modprobe md_mod echo 2000 > /proc/sys/dev/raid/speed_limit_max echo 0 > /sys/module/md_mod/parameters/start_ro @@ -86,7 +93,9 @@ mdadm() { *-C* ) $mdadm 2> $targetdir/stderr --quiet "$@" --auto=yes;; * ) $mdadm 2> $targetdir/stderr --quiet "$@" esac + rv=$? cat >&2 $targetdir/stderr + return $rv } # check various things diff --git a/tests/01r5integ b/tests/01r5integ new file mode 100644 index 00000000..714a57fb --- /dev/null +++ b/tests/01r5integ @@ -0,0 +1,29 @@ + +# Check integrity of raid5 in degraded mode +# Create a 4 disk raid5, create a filesystem and +# sh1sum it with each device failed + +for layout in ls rs la ra +do + mdadm -CR $md0 -l5 --layout $layout -n4 $dev0 $dev1 $dev2 $dev3 + check wait + tar cf - /etc > $md0 + sum=`sha1sum $md0` + + for i in $dev0 $dev1 $dev2 $dev3 + do + mdadm $md0 -f $i + mdadm $md0 -r $i + blockdev --flushbufs $md0 + sum1=`sha1sum $md0` + if [ $sum != $sum1 ] + then + echo $sum does not matc $sum1 with $i missing + exit 1 + fi + mdadm $md0 -a $i + check wait + done + mdadm -S $md0 +done + diff --git a/tests/01raid6integ b/tests/01raid6integ new file mode 100644 index 00000000..ed7cec57 --- /dev/null +++ b/tests/01raid6integ @@ -0,0 +1,53 @@ + +# Check integrity of raid6 in degraded modes +# Create a 5 disk raid6, dump some data to it, then +# sh1sum it with different pairs of devices failed + +layouts='ls rs la ra' +lv=`uname -r` +if expr $lv '>=' 2.6.30 > /dev/null +then + layouts="$layouts parity-first dd-zero-restart ddf-N-restart ddf-N-continue \ + left-asymmetric-6 right-asymmetric-6 left-symmetric-6 right-symmetric-6 parity-first-6" +fi +echo $layouts +for layout in $layouts +do + mdadm -CR $md0 -l6 --layout $layout -n5 $dev0 $dev1 $dev2 $dev3 $dev4 + check wait + tar cf - /etc > $md0 + sum=`sha1sum $md0` + + totest= + for second in $dev0 $dev1 $dev2 $dev3 $dev4 + do + mdadm $md0 -f $second + mdadm $md0 -r $second + blockdev --flushbufs $md0 + sum1=`sha1sum $md0` + if [ $sum != $sum1 ] + then + echo $sum does not matc $sum1 with $second missing + exit 1 + fi + for first in $totest + do + mdadm $md0 -f $first + mdadm $md0 -r $first + blockdev --flushbufs $md0 + sum1=`sha1sum $md0` + if [ $sum != $sum1 ] + then + echo $sum does not matc $sum1 with $first and $second missing + exit 1 + fi + mdadm $md0 -a $first + check wait + done + mdadm $md0 -a $second + check wait + totest="$totest $second" + done + mdadm -S $md0 +done + diff --git a/tests/10ddf-create b/tests/10ddf-create new file mode 100644 index 00000000..db22b64f --- /dev/null +++ b/tests/10ddf-create @@ -0,0 +1,76 @@ +# +# Test basic DDF functionality. +# +# Create a container with 5 drives +# create a small raid0 across them all, then a 2disk raid1 +# and a 3disk raid5 using the remaining space +# +# add some data, tear down the array, reassemble +# and make sure it is still there. + +mdadm -CR /dev/md/ddf0 -e ddf -n 5 $dev8 $dev9 $dev10 $dev11 $dev12 +mdadm -CR r0 -l0 -n5 /dev/md/ddf0 -z 5000 +mdadm -CR r1 -l1 -n2 /dev/md/ddf0 +mdadm -CR r5 -l5 -n3 /dev/md/ddf0 +testdev /dev/md/r0 5 5000 64 +# r0 will use 4992 due to chunk size, so that leave 27776 for the rest +testdev /dev/md/r1 1 27776 1 +testdev /dev/md/r5 2 27776 64 +dd if=/dev/sda of=/dev/md/r0 || true +dd if=/dev/sda of=/dev/md/r1 || true +dd if=/dev/sda of=/dev/md/r5 || true + +s0=`sha1sum /dev/md/r0` +s1=`sha1sum /dev/md/r1` +s5=`sha1sum /dev/md/r5` + + +mdadm -Ss +mdadm -A /dev/md/ddf0 $dev8 $dev9 $dev10 $dev11 $dev12 +mdadm -I /dev/md/ddf0 + +s0a=`sha1sum /dev/md/r0` +s1a=`sha1sum /dev/md/r1` +s5a=`sha1sum /dev/md/r5` + +if [ "$s0" != "$s0a" ]; then + echo r0 did not match ; exit 1; +fi +if [ "$s1" != "$s1a" ]; then + echo r1 did not match ; exit 1; +fi +if [ "$s5" != "$s5a" ]; then + echo r5 did not match ; exit 1; +fi + +# failure status just means it has completed already, so ignore it. +mdadm --wait /dev/md/r1 || true +mdadm --wait /dev/md/r5 || true + +mdadm -Dbs > /var/tmp/mdadm.conf + +mdadm -Ss + +# Now try to assemble using mdadm.conf +mdadm -Asc /var/tmp/mdadm.conf +check nosync # This failed once. The raid5 was resyncing. + +mdadm -Dbs > /tmp/mdadm.conf +diff /tmp/mdadm.conf /var/tmp/mdadm.conf +mdadm -Ss + +# and now assemble fully incrementally. +for i in $dev8 $dev9 $dev10 $dev11 $dev12 +do + #./mdadm -I $i -vv 2>&1 | wc -l > /tmp/cnt + ./mdadm -I $i 2> /tmp/thing + wc -l < /tmp/thing > /tmp/cnt + # should find container and 2 devices, so 3 lines. + [ `cat /tmp/cnt` -eq 3 ] +done +check nosync + +mdadm -Dbs > /tmp/mdadm.conf +diff /tmp/mdadm.conf /var/tmp/mdadm.conf +mdadm -Ss +rm /tmp/mdadm.conf /var/tmp/mdadm.conf diff --git a/tests/env-09imsm-create-fail-rebuild b/tests/env-09imsm-create-fail-rebuild index 2fc90edc..b44746c5 100644 --- a/tests/env-09imsm-create-fail-rebuild +++ b/tests/env-09imsm-create-fail-rebuild @@ -1,5 +1,5 @@ imsm_check_hold() { - if [ mdadm --remove $1 $2 -eq 0 ]; then + if mdadm --remove $1 $2; then echo "$2 removal from $1 should have been blocked" >&2 cat /proc/mdstat >&2 mdadm -E $2 @@ -8,7 +8,7 @@ imsm_check_hold() { } imsm_check_removal() { - if [ mdadm --remove $1 $2 -ne 0 ]; then + if ! mdadm --remove $1 $2 ; then echo "$2 removal from $1 should have succeeded" >&2 cat /proc/mdstat >&2 mdadm -E $2 @@ -17,6 +17,7 @@ imsm_check_removal() { } imsm_check() { + udevadm settle case $1 in container ) grep -s "$(((418 * $2)/2)) blocks super external:imsm" /proc/mdstat > /dev/null || { diff --git a/udev-md-raid.rules b/udev-md-raid.rules index 1b0da389..eec16b73 100644 --- a/udev-md-raid.rules +++ b/udev-md-raid.rules @@ -2,12 +2,18 @@ SUBSYSTEM!="block", GOTO="md_end" ACTION!="add|change", GOTO="md_end" +ACTION=="change", GOTO="md_no_incr" # import data from a raid member and activate it #ENV{ID_FS_TYPE}=="linux_raid_member", IMPORT{program}="/sbin/mdadm --examine --export $tempnode", RUN+="/sbin/mdadm --incremental $env{DEVNAME}" # import data from a raid set +LABEL="md_no_incr" KERNEL!="md*", GOTO="md_end" +# partitions have no md/{array_state,metadata_version}, but should not +# for that reason be ignored. +ENV{DEVTYPE}=="partition", GOTO="md_ignore_state" + # container devices have a metadata version of e.g. 'external:ddf' and # never leave state 'inactive' ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state" diff --git a/util.c b/util.c index e613a0c8..35bb91bb 100644 --- a/util.c +++ b/util.c @@ -765,7 +765,7 @@ int find_free_devnum(int use_partitions) { int devnum; for (devnum = 127; devnum != 128; - devnum = devnum ? devnum-1 : (1<<22)-1) { + devnum = devnum ? devnum-1 : (1<<20)-1) { char *dn; int _devnum; @@ -852,13 +852,20 @@ int same_dev(char *one, char *two) return st1.st_rdev == st2.st_rdev; } -void wait_for(char *dev) +void wait_for(char *dev, int fd) { int i; + struct stat stb_want; + + if (fstat(fd, &stb_want) != 0 || + (stb_want.st_mode & S_IFMT) != S_IFBLK) + return; for (i=0 ; i<25 ; i++) { struct stat stb; - if (stat(dev, &stb) == 0) + if (stat(dev, &stb) == 0 && + (stb.st_mode & S_IFMT) == S_IFBLK && + (stb.st_rdev == stb_want.st_rdev)) return; usleep(200000); } @@ -1085,7 +1092,8 @@ int add_disk(int mdfd, struct supertype *st, int rv; #ifndef MDASSEMBLE if (st->ss->external) { - rv = sysfs_add_disk(sra, info); + rv = sysfs_add_disk(sra, info, + info->disk.state & (1<devs; sd2; sd2=sd2->next)