X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=Assemble.c;h=a73e101ed047e75c139ab48b84a70f0c15270df2;hp=794b00d43421909cc6f91582aa5c4fc777abd2ed;hb=ff54de6e47163944185f231700e72d3122b58f4c;hpb=feb716e9c3568a45b8815bf2c59e417d30635f89 diff --git a/Assemble.c b/Assemble.c index 794b00d4..a73e101e 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2002 Neil Brown + * Copyright (C) 2001-2006 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -28,14 +28,33 @@ */ #include "mdadm.h" -#include "md_u.h" -#include "md_p.h" +#include -int Assemble(char *mddev, int mdfd, - mddev_ident_t ident, char *conffile, - mddev_dev_t devlist, +static int name_matches(char *found, char *required, char *homehost) +{ + /* See if the name found matches the required name, possibly + * prefixed with 'homehost' + */ + char fnd[33]; + + strncpy(fnd, found, 32); + fnd[32] = 0; + if (strcmp(found, required)==0) + return 1; + if (homehost) { + int l = strlen(homehost); + if (l < 32 && fnd[l] == ':' && + strcmp(fnd+l+1, required)==0) + return 1; + } + return 0; +} + +int Assemble(struct supertype *st, char *mddev, int mdfd, + mddev_ident_t ident, + mddev_dev_t devlist, char *backup_file, int readonly, int runstop, - char *update, + char *update, char *homehost, int verbose, int force) { /* @@ -52,11 +71,11 @@ int Assemble(char *mddev, int mdfd, * Much of the work of Assemble is in finding and/or * checking the disks to make sure they look right. * - * If mddev is not set, then scan must be and we + * If mddev is not set, then scan must be set and we * read through the config file for dev+uuid mapping * We recurse, setting mddev, for each device that * - isn't running - * - has a valid uuid (or any uuid if !uuidset + * - has a valid uuid (or any uuid if !uuidset) * * If mddev is set, we try to determine state of md. * check version - must be at least 0.90.0 @@ -68,15 +87,15 @@ int Assemble(char *mddev, int mdfd, * * If !uuidset and scan, look in conf-file for uuid * If not found, give up - * If !devlist and scan and uuidset, get list of devs from conf-file + * If !devlist and scan and uuidset, get list of devs from conf-file * * For each device: * Check superblock - discard if bad * Check uuid (set if we don't have one) - discard if no match * Check superblock similarity if we have a superblock - discard if different - * Record events, devicenum, utime + * Record events, devicenum * This should give us a list of devices for the array - * We should collect the most recent event and utime numbers + * We should collect the most recent event number * * Count disks with recent enough event count * While force && !enough disks @@ -92,53 +111,60 @@ int Assemble(char *mddev, int mdfd, * START_ARRAY * */ + int clean = 0; + int must_close = 0; int old_linux = 0; - int vers; - mdu_array_info_t array; - mdp_super_t first_super, super; + int vers = 0; /* Keep gcc quite - it really is initialised */ struct { char *devname; - int major, minor; - int oldmajor, oldminor; - long long events; - time_t utime; - int uptodate; - int state; - int raid_disk; + int uptodate; /* set once we decide that this device is as + * recent as everything else in the array. + */ + struct mdinfo i; } *devices; int *best = NULL; /* indexed by raid_disk */ - int bestcnt = 0; - int devcnt = 0, okcnt, sparecnt; - int req_cnt; - int i; + unsigned int bestcnt = 0; + int devcnt = 0; + unsigned int okcnt, sparecnt; + unsigned int req_cnt; + unsigned int i; int most_recent = 0; int chosen_drive; int change = 0; int inargv = 0; - int start_partial_ok = force || devlist==NULL; - int num_devs; + int bitmap_done; + int start_partial_ok = (runstop >= 0) && (force || devlist==NULL || mdfd < 0); + unsigned int num_devs; mddev_dev_t tmpdev; - - vers = md_get_version(mdfd); - if (vers <= 0) { - fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev); - return 1; - } - if (vers < 9000) { - fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n" - " Upgrade your kernel or try --build\n"); - return 1; - } + struct mdinfo info; + char *avail; + int nextspare = 0; + int uuid_for_name = 0; + + memset(&info, 0, sizeof(info)); + if (get_linux_version() < 2004000) old_linux = 1; - if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) { - fprintf(stderr, Name ": device %s already active - cannot assemble it\n", - mddev); - return 1; - } - ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */ + if (mdfd >= 0) { + vers = md_get_version(mdfd); + if (vers <= 0) { + fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev); + return 1; + } + if (vers < 9000) { + fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n" + " Upgrade your kernel or try --build\n"); + return 1; + } + if (ioctl(mdfd, GET_ARRAY_INFO, &info.array)>=0) { + fprintf(stderr, Name ": device %s already active - cannot assemble it\n", + mddev); + return 1; + } + ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */ + } /* * If any subdevs are listed, then any that don't * match ident are discarded. Remainder must all match and @@ -152,213 +178,378 @@ int Assemble(char *mddev, int mdfd, ident->super_minor < 0 && ident->devices == NULL) { fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n", - mddev); + mddev ? mddev : "further assembly"); return 1; } if (devlist == NULL) - devlist = conf_get_devs(conffile); - else inargv = 1; + devlist = conf_get_devs(); + else if (mdfd >= 0) + inargv = 1; + + try_again: tmpdev = devlist; num_devs = 0; while (tmpdev) { - num_devs++; + if (tmpdev->used) + tmpdev->used = 2; + else + num_devs++; tmpdev = tmpdev->next; } - best = malloc(num_devs * sizeof(*best)); devices = malloc(num_devs * sizeof(*devices)); - first_super.md_magic = 0; - for (i=0; ist) st = ident->st; - if (verbose) + if (verbose>0) fprintf(stderr, Name ": looking for devices for %s\n", - mddev); + mddev ? mddev : "further assembly"); - while ( devlist) { - char *devname; - int this_uuid[4]; + /* first walk the list of devices to find a consistent set + * that match the criterea, if that is possible. + * We flag the one we like with 'used'. + */ + for (tmpdev = devlist; + tmpdev; + tmpdev = tmpdev->next) { + char *devname = tmpdev->devname; int dfd; struct stat stb; - int havesuper=0; + struct supertype *tst = dup_super(st); - devname = devlist->devname; - devlist = devlist->next; + if (tmpdev->used > 1) continue; if (ident->devices && !match_oneof(ident->devices, devname)) { - if (inargv || verbose) + if ((inargv && verbose>=0) || verbose > 0) fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices); continue; } - - dfd = open(devname, O_RDONLY, 0); + + dfd = dev_open(devname, O_RDONLY|O_EXCL); if (dfd < 0) { - if (inargv || verbose) + if ((inargv && verbose >= 0) || verbose > 0) fprintf(stderr, Name ": cannot open device %s: %s\n", devname, strerror(errno)); + tmpdev->used = 2; } else if (fstat(dfd, &stb)< 0) { /* Impossible! */ fprintf(stderr, Name ": fstat failed for %s: %s\n", devname, strerror(errno)); - close(dfd); + tmpdev->used = 2; } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { fprintf(stderr, Name ": %s is not a block device.\n", devname); - close(dfd); - } else if (load_super(dfd, &super)) { - if (inargv || verbose) + tmpdev->used = 2; + } else if (!tst && (tst = guess_super(dfd)) == NULL) { + if ((inargv && verbose >= 0) || verbose > 0) + fprintf(stderr, Name ": no recogniseable superblock on %s\n", + devname); + tmpdev->used = 2; + } else if (tst->ss->load_super(tst,dfd, NULL)) { + if ((inargv && verbose >= 0) || verbose > 0) fprintf( stderr, Name ": no RAID superblock on %s\n", devname); - close(dfd); } else { - havesuper =1; - uuid_from_super(this_uuid, &super); - close(dfd); + tst->ss->getinfo_super(tst, &info); } + if (dfd >= 0) close(dfd); - if (ident->uuid_set && - (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) { - if (inargv || verbose) + if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) && + (!tst || !tst->sb || + same_uuid(info.uuid, ident->uuid, tst->ss->swapuuid)==0)) { + if ((inargv && verbose >= 0) || verbose > 0) fprintf(stderr, Name ": %s has wrong uuid.\n", devname); - continue; + goto loop; + } + if (ident->name[0] && (!update || strcmp(update, "name")!= 0) && + (!tst || !tst->sb || + name_matches(info.name, ident->name, homehost)==0)) { + if ((inargv && verbose >= 0) || verbose > 0) + fprintf(stderr, Name ": %s has wrong name.\n", + devname); + goto loop; } - if (ident->super_minor >= 0 && - (!havesuper || ident->super_minor != super.md_minor)) { - if (inargv || verbose) + if (ident->super_minor != UnSet && + (!tst || !tst->sb || + ident->super_minor != info.array.md_minor)) { + if ((inargv && verbose >= 0) || verbose > 0) fprintf(stderr, Name ": %s has wrong super-minor.\n", devname); - continue; + goto loop; } - if (ident->level != -10 && - (!havesuper|| ident->level != super.level)) { - if (inargv || verbose) + if (ident->level != UnSet && + (!tst || !tst->sb || + ident->level != info.array.level)) { + if ((inargv && verbose >= 0) || verbose > 0) fprintf(stderr, Name ": %s has wrong raid level.\n", devname); - continue; + goto loop; } - if (ident->raid_disks != -1 && - (!havesuper || ident->raid_disks!= super.raid_disks)) { - if (inargv || verbose) + if (ident->raid_disks != UnSet && + (!tst || !tst->sb || + ident->raid_disks!= info.array.raid_disks)) { + if ((inargv && verbose >= 0) || verbose > 0) fprintf(stderr, Name ": %s requires wrong number of drives.\n", devname); - continue; + goto loop; } - - /* If we are this far, then we are commited to this device. + if (mdfd < 0) { + if (tst == NULL || tst->sb == NULL) + continue; + switch(tst->ss->match_home(tst, homehost)) + { + case 1: /* happy with match. */ + break; + case -1: /* cannot match */ + uuid_for_name = 1; + break; + case 0: /* Doesn't match */ + if (update) + /* We are changing the name*/ + break; + if ((inargv && verbose >= 0) || verbose > 0) + fprintf(stderr, Name ": %s is not built for " + "host %s - using UUID for " + "device name.\n", + devname, homehost); + + /* Auto-assemble, and this is not a usable host */ + /* if update != NULL, we are updating the host + * name... */ + uuid_for_name = 1; + break; + } + } + /* If we are this far, then we are nearly commited to this device. * If the super_block doesn't exist, or doesn't match others, - * then we cannot continue + * then we probably cannot continue + * However if one of the arrays is for the homehost, and + * the other isn't that can disambiguate. */ - if (!havesuper) { + if (!tst || !tst->sb) { fprintf(stderr, Name ": %s has no superblock - assembly aborted\n", devname); + if (st) + st->ss->free_super(st); return 1; } - if (compare_super(&first_super, &super)) { + + if (st == NULL) + st = dup_super(tst); + if (st->minor_version == -1) + st->minor_version = tst->minor_version; + if (st->ss != tst->ss || + st->minor_version != tst->minor_version || + st->ss->compare_super(st, tst) != 0) { + /* Some mismatch. If exactly one array matches this host, + * we can resolve on that one. + * Or, if we are auto assembling, we just ignore the second + * for now. + */ + if (mdfd < 0) + goto loop; + if (homehost) { + int first = st->ss->match_home(st, homehost); + int last = tst->ss->match_home(tst, homehost); + if (first != last && + (first == 1 || last == 1)) { + /* We can do something */ + if (first) {/* just ignore this one */ + if ((inargv && verbose >= 0) || verbose > 0) + fprintf(stderr, Name ": %s misses out due to wrong homehost\n", + devname); + goto loop; + } else { /* reject all those sofar */ + mddev_dev_t td; + if ((inargv && verbose >= 0) || verbose > 0) + fprintf(stderr, Name ": %s overrides previous devices due to good homehost\n", + devname); + for (td=devlist; td != tmpdev; td=td->next) + if (td->used == 1) + td->used = 0; + tmpdev->used = 1; + goto loop; + } + } + } fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n", devname); + tst->ss->free_super(tst); + st->ss->free_super(st); return 1; } + tmpdev->used = 1; - /* this is needed until we get a more relaxed super block format */ - if (devcnt >= MD_SB_DISKS) { - fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n", - devname); - continue; + loop: + if (tst) + tst->ss->free_super(tst); + } + + if (mdfd < 0) { + /* So... it is up to me to open the device. + * We create a name '/dev/md/XXX' based on the info in the + * superblock, and call open_mddev on that + */ + mdu_array_info_t inf; + char *c; + char nbuf[64]; + if (!st || !st->sb) { + return 2; + } + st->ss->getinfo_super(st, &info); + if (uuid_for_name) + c = fname_from_uuid(st, &info, nbuf, '-'); + else { + c = strchr(info.name, ':'); + if (c) c++; else c= info.name; + } + if (isdigit(*c) && ((ident->autof & 7)==4 || (ident->autof&7)==6)) + /* /dev/md/d0 style for partitionable */ + asprintf(&mddev, "/dev/md/d%s", c); + else + asprintf(&mddev, "/dev/md/%s", c); + mdfd = open_mddev(mddev, ident->autof); + if (mdfd < 0) { + st->ss->free_super(st); + free(devices); + goto try_again; + } + vers = md_get_version(mdfd); + if (ioctl(mdfd, GET_ARRAY_INFO, &inf)==0) { + for (tmpdev = devlist ; + tmpdev && tmpdev->used != 1; + tmpdev = tmpdev->next) + ; + fprintf(stderr, Name ": %s already active, cannot restart it!\n", mddev); + if (tmpdev) + fprintf(stderr, Name ": %s needed for %s...\n", + mddev, tmpdev->devname); + close(mdfd); + mdfd = -1; + st->ss->free_super(st); + free(devices); + goto try_again; } - + must_close = 1; + } + + /* Ok, no bad inconsistancy, we can try updating etc */ + bitmap_done = 0; + for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) { + char *devname = tmpdev->devname; + struct stat stb; /* looks like a good enough match to update the super block if needed */ +#ifndef MDASSEMBLE if (update) { - if (strcmp(update, "sparc2.2")==0 ) { - /* 2.2 sparc put the events in the wrong place - * So we copy the tail of the superblock - * up 4 bytes before continuing - */ - __u32 *sb32 = (__u32*)&super; - memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7, - sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1, - (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4); - fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n", - devname); - } - if (strcmp(update, "super-minor") ==0) { - struct stat stb2; - fstat(mdfd, &stb2); - super.md_minor = MINOR(stb2.st_rdev); - if (verbose) - fprintf(stderr, Name ": updating superblock of %s with minor number %d\n", - devname, super.md_minor); + int dfd; + /* prepare useful information in info structures */ + struct stat stb2; + struct supertype *tst; + fstat(mdfd, &stb2); + + if (strcmp(update, "uuid")==0 && + !ident->uuid_set) { + int rfd; + if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || + read(rfd, ident->uuid, 16) != 16) { + *(__u32*)(ident->uuid) = random(); + *(__u32*)(ident->uuid+1) = random(); + *(__u32*)(ident->uuid+2) = random(); + *(__u32*)(ident->uuid+3) = random(); + } + if (rfd >= 0) close(rfd); } - if (strcmp(update, "summaries") == 0) { - /* set nr_disks, active_disks, working_disks, - * failed_disks, spare_disks based on disks[] - * array in superblock. - * Also make sure extra slots aren't 'failed' - */ - super.nr_disks = super.active_disks = - super.working_disks = super.failed_disks = - super.spare_disks = 0; - for (i=0; i < MD_SB_DISKS ; i++) - if (super.disks[i].major || - super.disks[i].minor) { - int state = super.disks[i].state; - if (state & (1<= super.raid_disks && super.disks[i].number == 0) - super.disks[i].state = 0; + dfd = dev_open(devname, O_RDWR|O_EXCL); + + remove_partitions(dfd); + + tst = dup_super(st); + tst->ss->load_super(tst, dfd, NULL); + tst->ss->getinfo_super(tst, &info); + + memcpy(info.uuid, ident->uuid, 16); + strcpy(info.name, ident->name); + info.array.md_minor = minor(stb2.st_rdev); + + tst->ss->update_super(tst, &info, update, + devname, verbose, + ident->uuid_set, homehost); + if (strcmp(update, "uuid")==0 && + !ident->uuid_set) { + ident->uuid_set = 1; + memcpy(ident->uuid, info.uuid, 16); } - super.sb_csum = calc_sb_csum(&super); - dfd = open(devname, O_RDWR, 0); - if (dfd < 0) + if (dfd < 0) fprintf(stderr, Name ": Cannot open %s for superblock update\n", devname); - else if (store_super(dfd, &super)) + else if (tst->ss->store_super(tst, dfd)) fprintf(stderr, Name ": Could not re-write superblock on %s.\n", devname); if (dfd >= 0) close(dfd); + + if (strcmp(update, "uuid")==0 && + ident->bitmap_fd >= 0 && !bitmap_done) { + if (bitmap_update_uuid(ident->bitmap_fd, + info.uuid, + tst->ss->swapuuid) != 0) + fprintf(stderr, Name ": Could not update uuid on external bitmap.\n"); + else + bitmap_done = 1; + } + tst->ss->free_super(tst); + } else +#endif + { + struct supertype *tst = dup_super(st); + int dfd; + dfd = dev_open(devname, O_RDWR|O_EXCL); + + remove_partitions(dfd); + + tst->ss->load_super(tst, dfd, NULL); + tst->ss->getinfo_super(tst, &info); + tst->ss->free_super(tst); + close(dfd); } - if (verbose) + stat(devname, &stb); + + if (verbose > 0) fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n", - devname, mddev, super.this_disk.raid_disk); + devname, mddev, info.disk.raid_disk); devices[devcnt].devname = devname; - devices[devcnt].major = MAJOR(stb.st_rdev); - devices[devcnt].minor = MINOR(stb.st_rdev); - devices[devcnt].oldmajor = super.this_disk.major; - devices[devcnt].oldminor = super.this_disk.minor; - devices[devcnt].events = md_event(&super); - devices[devcnt].utime = super.utime; - devices[devcnt].raid_disk = super.this_disk.raid_disk; devices[devcnt].uptodate = 0; - devices[devcnt].state = super.this_disk.state; + devices[devcnt].i = info; + devices[devcnt].i.disk.major = major(stb.st_rdev); + devices[devcnt].i.disk.minor = minor(stb.st_rdev); if (most_recent < devcnt) { - if (devices[devcnt].events - > devices[most_recent].events) + if (devices[devcnt].i.events + > devices[most_recent].i.events) most_recent = devcnt; } - if (super.level == -4) + if (info.array.level == -4) /* with multipath, the raid_disk from the superblock is meaningless */ i = devcnt; else - i = devices[devcnt].raid_disk; - if (i>=0 && i < 10000) { + i = devices[devcnt].i.disk.raid_disk; + if (i+1 == 0) { + if (nextspare < info.array.raid_disks) + nextspare = info.array.raid_disks; + i = nextspare++; + } else { + if (i >= info.array.raid_disks && + i >= nextspare) + nextspare = i+1; + } + if (i < 10000) { if (i >= bestcnt) { - int newbestcnt = i+10; + unsigned int newbestcnt = i+10; int *newbest = malloc(sizeof(int)*newbestcnt); - int c; + unsigned int c; for (c=0; c < newbestcnt; c++) if (c < bestcnt) newbest[c] = best[c]; @@ -368,8 +559,33 @@ int Assemble(char *mddev, int mdfd, best = newbest; bestcnt = newbestcnt; } + if (best[i] >=0 && + devices[best[i]].i.events + == devices[devcnt].i.events + && (devices[best[i]].i.disk.minor + != devices[devcnt].i.disk.minor) + && st->ss == &super0 + && info.array.level != LEVEL_MULTIPATH) { + /* two different devices with identical superblock. + * Could be a mis-detection caused by overlapping + * partitions. fail-safe. + */ + fprintf(stderr, Name ": WARNING %s and %s appear" + " to have very similar superblocks.\n" + " If they are really different, " + "please --zero the superblock on one\n" + " If they are the same or overlap," + " please remove one from %s.\n", + devices[best[i]].devname, devname, + inargv ? "the list" : + "the\n DEVICE list in mdadm.conf" + ); + if (must_close) close(mdfd); + return 1; + } if (best[i] == -1 - || devices[best[i]].events < devices[devcnt].events) + || (devices[best[i]].i.events + < devices[devcnt].i.events)) best[i] = devcnt; } devcnt++; @@ -378,89 +594,130 @@ int Assemble(char *mddev, int mdfd, if (devcnt == 0) { fprintf(stderr, Name ": no devices found for %s\n", mddev); + if (st) + st->ss->free_super(st); + if (must_close) close(mdfd); return 1; } + + if (update && strcmp(update, "byteorder")==0) + st->minor_version = 90; + + st->ss->getinfo_super(st, &info); + clean = info.array.state & 1; + /* now we have some devices that might be suitable. * I wonder how many */ + avail = malloc(info.array.raid_disks); + memset(avail, 0, info.array.raid_disks); okcnt = 0; sparecnt=0; for (i=0; i< bestcnt ;i++) { int j = best[i]; - int event_margin = !force; + int event_margin = 1; /* always allow a difference of '1' + * like the kernel does + */ if (j < 0) continue; /* note: we ignore error flags in multipath arrays * as they don't make sense */ - if (first_super.level != -4) - if (!(devices[j].state & (1<= - devices[most_recent].events) { + if (devices[j].i.events+event_margin >= + devices[most_recent].i.events) { devices[j].uptodate = 1; - if (i < first_super.raid_disks) + if (i < info.array.raid_disks) { okcnt++; - else + avail[i]=1; + } else sparecnt++; } } - while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) { + while (force && !enough(info.array.level, info.array.raid_disks, + info.array.layout, 1, + avail, okcnt)) { /* Choose the newest best drive which is * not up-to-date, update the superblock * and add it. */ int fd; + struct supertype *tst; + long long current_events; chosen_drive = -1; - for (i=0; i=0 && !devices[j].uptodate && - devices[j].events > 0 && + devices[j].i.events > 0 && (chosen_drive < 0 || - devices[j].events > devices[chosen_drive].events)) + devices[j].i.events + > devices[chosen_drive].i.events)) chosen_drive = j; } if (chosen_drive < 0) break; - fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n", - devices[chosen_drive].devname, devices[chosen_drive].raid_disk, - (int)(devices[chosen_drive].events), - (int)(devices[most_recent].events)); - fd = open(devices[chosen_drive].devname, O_RDWR); + current_events = devices[chosen_drive].i.events; + add_another: + if (verbose >= 0) + fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n", + devices[chosen_drive].devname, + devices[chosen_drive].i.disk.raid_disk, + (int)(devices[chosen_drive].i.events), + (int)(devices[most_recent].i.events)); + fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL); if (fd < 0) { fprintf(stderr, Name ": Couldn't open %s for write - not updating\n", devices[chosen_drive].devname); - devices[chosen_drive].events = 0; + devices[chosen_drive].i.events = 0; continue; } - if (load_super(fd, &super)) { + tst = dup_super(st); + if (tst->ss->load_super(tst,fd, NULL)) { close(fd); fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n", devices[chosen_drive].devname); - devices[chosen_drive].events = 0; + devices[chosen_drive].i.events = 0; continue; } - super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF; - super.events_lo = (devices[most_recent].events)&0xFFFFFFFF; - if (super.level == 5 || super.level == 4) { - /* need to force clean */ - super.state = (1<ss->update_super(tst, &info, "force-one", + devices[chosen_drive].devname, verbose, + 0, NULL); + + if (tst->ss->store_super(tst, fd)) { close(fd); fprintf(stderr, Name ": Could not re-write superblock on %s\n", devices[chosen_drive].devname); - devices[chosen_drive].events = 0; + devices[chosen_drive].i.events = 0; + tst->ss->free_super(tst); continue; } close(fd); - devices[chosen_drive].events = devices[most_recent].events; + devices[chosen_drive].i.events = devices[most_recent].i.events; devices[chosen_drive].uptodate = 1; + avail[chosen_drive] = 1; okcnt++; + tst->ss->free_super(tst); + + /* If there are any other drives of the same vintage, + * add them in as well. We can't lose and we might gain + */ + for (i=0; i= 0 && + !devices[j].uptodate && + devices[j].i.events > 0 && + devices[j].i.events == current_events) { + chosen_drive = j; + goto add_another; + } + } } /* Now we want to look at the superblock which the kernel will base things on @@ -470,33 +727,45 @@ int Assemble(char *mddev, int mdfd, * superblock. */ chosen_drive = -1; + st->ss->free_super(st); for (i=0; chosen_drive < 0 && iss->load_super(st,fd, NULL)) { close(fd); fprintf(stderr, Name ": RAID superblock has disappeared from %s\n", devices[j].devname); + if (must_close) close(mdfd); return 1; } close(fd); } - + if (st->sb == NULL) { + fprintf(stderr, Name ": No suitable drives found for %s\n", mddev); + if (must_close) close(mdfd); + return 1; + } + st->ss->getinfo_super(st, &info); +#ifndef MDASSEMBLE + sysfs_init(&info, mdfd, 0); +#endif for (i=0; iss->update_super(st, &devices[j].i, "assemble", NULL, + verbose, 0, NULL)) { if (force) { - fprintf(stderr, Name ": " - "clearing FAULTY flag for device %d in %s for %s\n", - j, mddev, devices[j].devname); - super.disks[i].state = desired_state; - change |= 2; + if (verbose >= 0) + fprintf(stderr, Name ": " + "clearing FAULTY flag for device %d in %s for %s\n", + j, mddev, devices[j].devname); + change = 1; } else { - fprintf(stderr, Name ": " - "device %d in %s has wrong state in superblock, but %s seems ok\n", - i, mddev, devices[j].devname); + if (verbose >= -1) + fprintf(stderr, Name ": " + "device %d in %s has wrong state in superblock, but %s seems ok\n", + i, mddev, devices[j].devname); } } - if (!devices[j].uptodate && - !(super.disks[i].state & (1 << MD_DISK_FAULTY))) { +#if 0 + if (!(super.disks[i].i.disk.state & (1 << MD_DISK_FAULTY))) { fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n", i, mddev); } +#endif } - if (force && (super.level == 4 || super.level == 5) && - okcnt == super.raid_disks-1) { - super.state = (1<< MD_SB_CLEAN); - change |= 2; + if (force && !clean && + !enough(info.array.level, info.array.raid_disks, + info.array.layout, clean, + avail, okcnt)) { + change += st->ss->update_super(st, &info, "force-array", + devices[chosen_drive].devname, verbose, + 0, NULL); + clean = 1; } - if ((force && (change & 2)) - || (old_linux && (change & 1))) { + if (change) { int fd; - super.sb_csum = calc_sb_csum(&super); - fd = open(devices[chosen_drive].devname, O_RDWR); + fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL); if (fd < 0) { - fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n", + fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n", devices[chosen_drive].devname); + if (must_close) close(mdfd); return 1; } - if (store_super(fd, &super)) { + if (st->ss->store_super(st, fd)) { close(fd); fprintf(stderr, Name ": Could not re-write superblock on %s\n", devices[chosen_drive].devname); + if (must_close) close(mdfd); return 1; } close(fd); - change = 0; } + /* If we are in the middle of a reshape we may need to restore saved data + * that was moved aside due to the reshape overwriting live data + * The code of doing this lives in Grow.c + */ +#ifndef MDASSEMBLE + if (info.reshape_active) { + int err = 0; + int *fdlist = malloc(sizeof(int)* bestcnt); + for (i=0; i= 0) { + fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL); + if (fdlist[i] < 0) { + fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n", + devices[j].devname); + err = 1; + break; + } + } else + fdlist[i] = -1; + } + if (!err) + err = Grow_restart(st, &info, fdlist, bestcnt, backup_file); + while (i>0) { + i--; + if (fdlist[i]>=0) close(fdlist[i]); + } + if (err) { + fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n"); + if (must_close) close(mdfd); + return err; + } + } +#endif /* count number of in-sync devices according to the superblock. * We must have this number to start the array without -s or -R */ - req_cnt = 0; - for (i=0; ibitmap_fd >= 0) { + if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) { + fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n"); + if (must_close) close(mdfd); + return 1; + } + } else if (ident->bitmap_file) { + /* From config file */ + int bmfd = open(ident->bitmap_file, O_RDWR); + if (bmfd < 0) { + fprintf(stderr, Name ": Could not open bitmap file %s\n", + ident->bitmap_file); + if (must_close) close(mdfd); + return 1; + } + if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) { + fprintf(stderr, Name ": Failed to set bitmapfile for %s\n", mddev); + close(bmfd); + if (must_close) close(mdfd); + return 1; + } + close(bmfd); + } + /* First, add the raid disks, but add the chosen one last */ for (i=0; i<= bestcnt; i++) { int j; @@ -595,62 +913,145 @@ This doesnt work yet j = chosen_drive; if (j >= 0 /* && devices[j].uptodate */) { - mdu_disk_info_t disk; - memset(&disk, 0, sizeof(disk)); - disk.major = devices[j].major; - disk.minor = devices[j].minor; - if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) { - fprintf(stderr, Name ": failed to add %s to %s: %s\n", + rv = add_disk(mdfd, st, &info, &devices[j].i); + + if (rv) { + fprintf(stderr, Name ": failed to add " + "%s to %s: %s\n", devices[j].devname, mddev, strerror(errno)); - if (i < first_super.raid_disks) + if (i < info.array.raid_disks + || i == bestcnt) okcnt--; else sparecnt--; - } else if (verbose) - fprintf(stderr, Name ": added %s to %s as %d\n", - devices[j].devname, mddev, devices[j].raid_disk); - } else if (verbose && i < first_super.raid_disks) - fprintf(stderr, Name ": no uptodate device for slot %d of %s\n", + } else if (verbose > 0) + fprintf(stderr, Name ": added %s " + "to %s as %d\n", + devices[j].devname, mddev, + devices[j].i.disk.raid_disk); + } else if (verbose > 0 && i < info.array.raid_disks) + fprintf(stderr, Name ": no uptodate device for " + "slot %d of %s\n", i, mddev); } - + + if (info.array.level == LEVEL_CONTAINER) { + if (verbose >= 0) { + fprintf(stderr, Name ": Container %s has been " + "assembled with %d drive%s", + mddev, okcnt, okcnt==1?"":"s"); + if (okcnt < info.array.raid_disks) + fprintf(stderr, " (out of %d)", + info.array.raid_disks); + fprintf(stderr, "\n"); + } + if (must_close) + close(mdfd); + return 0; + } + if (runstop == 1 || - (runstop == 0 && - ( enough(first_super.level, first_super.raid_disks, okcnt) && + (runstop <= 0 && + ( enough(info.array.level, info.array.raid_disks, + info.array.layout, clean, avail, okcnt) && (okcnt >= req_cnt || start_partial_ok) ))) { if (ioctl(mdfd, RUN_ARRAY, NULL)==0) { - fprintf(stderr, Name ": %s has been started with %d drive%s", - mddev, okcnt, okcnt==1?"":"s"); - if (okcnt < first_super.raid_disks) - fprintf(stderr, " (out of %d)", first_super.raid_disks); - if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); - fprintf(stderr, ".\n"); + if (verbose >= 0) { + fprintf(stderr, Name ": %s has been started with %d drive%s", + mddev, okcnt, okcnt==1?"":"s"); + if (okcnt < info.array.raid_disks) + fprintf(stderr, " (out of %d)", info.array.raid_disks); + if (sparecnt) + fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + fprintf(stderr, ".\n"); + } + if (must_close) { + int usecs = 1; + close(mdfd); + /* There is a nasty race with 'mdadm --monitor'. + * If it opens this device before we close it, + * it gets an incomplete open on which IO + * doesn't work and the capacity is + * wrong. + * If we reopen (to check for layered devices) + * before --monitor closes, we loose. + * + * So: wait upto 1 second for there to be + * a non-zero capacity. + */ + while (usecs < 1000) { + mdfd = open(mddev, O_RDONLY); + if (mdfd >= 0) { + unsigned long long size; + if (get_dev_size(mdfd, NULL, &size) && + size > 0) + break; + close(mdfd); + } + usleep(usecs); + usecs <<= 1; + } + } return 0; } fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n", mddev, strerror(errno)); + + if (!enough(info.array.level, info.array.raid_disks, + info.array.layout, 1, avail, okcnt)) + fprintf(stderr, Name ": Not enough devices to " + "start the array.\n"); + else if (!enough(info.array.level, + info.array.raid_disks, + info.array.layout, clean, + avail, okcnt)) + fprintf(stderr, Name ": Not enough devices to " + "start the array while not clean " + "- consider --force.\n"); + + if (must_close) { + ioctl(mdfd, STOP_ARRAY, NULL); + close(mdfd); + } return 1; } if (runstop == -1) { - fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n", + fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s"); + if (okcnt != info.array.raid_disks) + fprintf(stderr, " (out of %d)", info.array.raid_disks); + fprintf(stderr, ", but not started.\n"); + if (must_close) close(mdfd); return 0; } - fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s"); - if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); - if (!enough(first_super.level, first_super.raid_disks, okcnt)) - fprintf(stderr, " - not enough to start the array.\n"); - else { - if (req_cnt == first_super.raid_disks) - fprintf(stderr, " - need all %d to start it", req_cnt); - else - fprintf(stderr, " - need %d of %d to start", req_cnt, first_super.raid_disks); - fprintf(stderr, " (use --run to insist).\n"); + if (verbose >= -1) { + fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s"); + if (sparecnt) + fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + if (!enough(info.array.level, info.array.raid_disks, + info.array.layout, 1, avail, okcnt)) + fprintf(stderr, " - not enough to start the array.\n"); + else if (!enough(info.array.level, + info.array.raid_disks, + info.array.layout, clean, + avail, okcnt)) + fprintf(stderr, " - not enough to start the " + "array while not clean - consider " + "--force.\n"); + else { + if (req_cnt == info.array.raid_disks) + fprintf(stderr, " - need all %d to start it", req_cnt); + else + fprintf(stderr, " - need %d of %d to start", req_cnt, info.array.raid_disks); + fprintf(stderr, " (use --run to insist).\n"); + } + } + if (must_close) { + ioctl(mdfd, STOP_ARRAY, NULL); + close(mdfd); } return 1; } else { @@ -659,13 +1060,14 @@ This doesnt work yet * so we can just start the array */ unsigned long dev; - dev = MKDEV(devices[chosen_drive].major, - devices[chosen_drive].minor); + dev = makedev(devices[chosen_drive].i.disk.major, + devices[chosen_drive].i.disk.minor); if (ioctl(mdfd, START_ARRAY, dev)) { fprintf(stderr, Name ": Cannot start array: %s\n", strerror(errno)); } - + } + if (must_close) close(mdfd); return 0; }