X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=Incremental.c;h=bcbd78d0b4abe67ec2069922936be39447a609c5;hp=aa9eb88dbf8fb652fad2787dc6af0757f2acb901;hb=0f22b998fb9cf8478810b89cd50fa5b4fbf11d38;hpb=f35f25259279573c6274e2783536c0b0a399bdd4 diff --git a/Incremental.c b/Incremental.c index aa9eb88d..bcbd78d0 100644 --- a/Incremental.c +++ b/Incremental.c @@ -2,7 +2,7 @@ * Incremental.c - support --incremental. Part of: * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2006 Neil Brown + * Copyright (C) 2006-2009 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -35,9 +35,12 @@ static int count_active(struct supertype *st, int mdfd, char **availp, static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra, int number, __u64 events, int verbose, char *array_name); +static int try_spare(char *devname, int *dfdp, struct dev_policy *pol, + struct supertype *st, int verbose); int Incremental(char *devname, int verbose, int runstop, - struct supertype *st, char *homehost, int autof) + struct supertype *st, char *homehost, int require_homehost, + int autof) { /* Add this device to an array, creating the array if necessary * and starting the array if sensible or - if runstop>0 - if possible. @@ -48,7 +51,8 @@ int Incremental(char *devname, int verbose, int runstop, * 2/ Find metadata, reject if none appropriate (check * version/name from args) * 3/ Check if there is a match in mdadm.conf - * 3a/ if not, check for homehost match. If no match, reject. + * 3a/ if not, check for homehost match. If no match, assemble as + * a 'foreign' array. * 4/ Determine device number. * - If in mdadm.conf with std name, use that * - UUID in /var/run/mdadm.map use that @@ -56,6 +60,7 @@ int Incremental(char *devname, int verbose, int runstop, * - Choose a free, high number. * - Use a partitioned device unless strong suggestion not to. * e.g. auto=md + * Don't choose partitioned for containers. * 5/ Find out if array already exists * 5a/ if it does not * - choose a name, from mdadm.conf or 'name' field in array. @@ -67,6 +72,7 @@ int Incremental(char *devname, int verbose, int runstop, * - add the device * 6/ Make sure /var/run/mdadm.map contains this array. * 7/ Is there enough devices to possibly start the array? + * For a container, this means running Incremental_container. * 7a/ if not, finish with success. * 7b/ if yes, * - read all metadata and arrange devices like -A does @@ -74,30 +80,30 @@ int Incremental(char *devname, int verbose, int runstop, * start the array (auto-readonly). */ struct stat stb; - struct mdinfo info; + struct mdinfo info, dinfo; struct mddev_ident_s *array_list, *match; char chosen_name[1024]; - int rv; - int devnum; + int rv = 1; struct map_ent *mp, *map = NULL; - int dfd, mdfd; + int dfd = -1, mdfd = -1; char *avail; int active_disks; - + int trustworthy = FOREIGN; + char *name_to_use; + mdu_array_info_t ainf; + struct dev_policy *policy = NULL; struct createinfo *ci = conf_get_create_info(); - if (autof == 0) - autof = ci->autof; - /* 1/ Check if devices is permitted by mdadm.conf */ + /* 1/ Check if device is permitted by mdadm.conf */ if (!conf_test_dev(devname)) { if (verbose >= 0) fprintf(stderr, Name ": %s not permitted by mdadm.conf.\n", devname); - return 1; + goto out; } /* 2/ Find metadata, reject if none appropriate (check @@ -108,47 +114,44 @@ int Incremental(char *devname, int verbose, int runstop, if (verbose >= 0) fprintf(stderr, Name ": cannot open %s: %s.\n", devname, strerror(errno)); - return 1; + goto out; } if (fstat(dfd, &stb) < 0) { if (verbose >= 0) fprintf(stderr, Name ": fstat failed for %s: %s.\n", devname, strerror(errno)); - close(dfd); - return 1; + goto out; } if ((stb.st_mode & S_IFMT) != S_IFBLK) { if (verbose >= 0) fprintf(stderr, Name ": %s is not a block device.\n", devname); - close(dfd); - return 1; + goto out; } + dinfo.disk.major = major(stb.st_rdev); + dinfo.disk.minor = minor(stb.st_rdev); + + policy = disk_policy(&dinfo); + if (st == NULL && (st = guess_super(dfd)) == NULL) { if (verbose >= 0) fprintf(stderr, Name ": no recognisable superblock on %s.\n", devname); - close(dfd); - return 1; + rv = try_spare(devname, &dfd, policy, st, verbose); + goto out; } - if (st->ss->load_super(st, dfd, NULL)) { + if (st->ss->compare_super == NULL || + st->ss->load_super(st, dfd, NULL)) { if (verbose >= 0) fprintf(stderr, Name ": no RAID superblock on %s.\n", devname); - close(dfd); - return 1; - } - close (dfd); - - if (st->ss->container_content) { - /* This is a pre-built container array, so we do something - * rather different. - */ - return Incremental_container(st, devname, verbose, runstop, - autof); + rv = try_spare(devname, &dfd, policy, st, verbose); + free(st); + goto out; } + close (dfd); dfd = -1; memset(&info, 0, sizeof(info)); st->ss->getinfo_super(st, &info); @@ -160,7 +163,7 @@ int Incremental(char *devname, int verbose, int runstop, if (array_list->uuid_set && same_uuid(array_list->uuid, info.uuid, st->ss->swapuuid) == 0) { - if (verbose >= 2) + if (verbose >= 2 && array_list->devname) fprintf(stderr, Name ": UUID differs from %s.\n", array_list->devname); @@ -168,7 +171,7 @@ int Incremental(char *devname, int verbose, int runstop, } if (array_list->name[0] && strcasecmp(array_list->name, info.name) != 0) { - if (verbose >= 2) + if (verbose >= 2 && array_list->devname) fprintf(stderr, Name ": Name differs from %s.\n", array_list->devname); @@ -176,7 +179,7 @@ int Incremental(char *devname, int verbose, int runstop, } if (array_list->devices && !match_oneof(array_list->devices, devname)) { - if (verbose >= 2) + if (verbose >= 2 && array_list->devname) fprintf(stderr, Name ": Not a listed device for %s.\n", array_list->devname); @@ -184,7 +187,7 @@ int Incremental(char *devname, int verbose, int runstop, } if (array_list->super_minor != UnSet && array_list->super_minor != info.array.md_minor) { - if (verbose >= 2) + if (verbose >= 2 && array_list->devname) fprintf(stderr, Name ": Different super-minor to %s.\n", array_list->devname); @@ -194,7 +197,7 @@ int Incremental(char *devname, int verbose, int runstop, !array_list->name[0] && !array_list->devices && array_list->super_minor == UnSet) { - if (verbose >= 2) + if (verbose >= 2 && array_list->devname) fprintf(stderr, Name ": %s doesn't have any identifying information.\n", array_list->devname); @@ -203,123 +206,144 @@ int Incremental(char *devname, int verbose, int runstop, /* FIXME, should I check raid_disks and level too?? */ if (match) { - if (verbose >= 0) - fprintf(stderr, Name + if (verbose >= 0) { + if (match->devname && array_list->devname) + fprintf(stderr, Name ": we match both %s and %s - cannot decide which to use.\n", - match->devname, array_list->devname); - return 2; + match->devname, array_list->devname); + else + fprintf(stderr, Name + ": multiple lines in mdadm.conf match\n"); + } + rv = 2; + goto out; } match = array_list; } - /* 3a/ if not, check for homehost match. If no match, reject. */ - if (!match) { - if (homehost == NULL || - st->ss->match_home(st, homehost) == 0) { - if (verbose >= 0) - fprintf(stderr, Name - ": not found in mdadm.conf and not identified by homehost.\n"); - return 2; - } + if (match && match->devname + && strcasecmp(match->devname, "") == 0) { + if (verbose >= 0) + fprintf(stderr, Name ": array containing %s is explicitly" + " ignored by mdadm.conf\n", + devname); + goto out; } - /* 4/ Determine device number. */ - /* - If in mdadm.conf with std name, use that */ - /* - UUID in /var/run/mdadm.map use that */ - /* - If name is suggestive, use that. unless in use with */ - /* different uuid. */ - /* - Choose a free, high number. */ - /* - Use a partitioned device unless strong suggestion not to. */ - /* e.g. auto=md */ - if (match && is_standard(match->devname, &devnum)) - /* We have devnum now */; - else if ((mp = map_by_uuid(&map, info.uuid)) != NULL) - devnum = mp->devnum; - else { - /* Have to guess a bit. */ - int use_partitions = 1; - char *np, *ep; - char *nm, nbuf[1024]; - if ((autof&7) == 3 || (autof&7) == 5) - use_partitions = 0; - np = strchr(info.name, ':'); - if (np) - np++; - else - np = info.name; - devnum = strtoul(np, &ep, 10); - if (ep > np && *ep == 0) { - /* This is a number. Let check that it is unused. */ - if (mddev_busy(use_partitions ? (-1-devnum) : devnum)) - devnum = -1; - } else - devnum = -1; - if (match) - nm = match->devname; + /* 3a/ if not, check for homehost match. If no match, continue + * but don't trust the 'name' in the array. Thus a 'random' minor + * number will be assigned, and the device name will be based + * on that. */ + if (match) + trustworthy = LOCAL; + else if (st->ss->match_home(st, homehost) == 1) + trustworthy = LOCAL; + else if (st->ss->match_home(st, "any") == 1) + trustworthy = LOCAL_ANY; + else + trustworthy = FOREIGN; + + + if (!match && !conf_test_metadata(st->ss->name, + (trustworthy == LOCAL))) { + if (verbose >= 1) + fprintf(stderr, Name + ": %s has metadata type %s for which " + "auto-assembly is disabled\n", + devname, st->ss->name); + goto out; + } + if (trustworthy == LOCAL_ANY) + trustworthy = LOCAL; + + /* There are three possible sources for 'autof': command line, + * ARRAY line in mdadm.conf, or CREATE line in mdadm.conf. + * ARRAY takes precedence, then command line, then + * CREATE. + */ + if (match && match->autof) + autof = match->autof; + if (autof == 0) + autof = ci->autof; + + if (st->ss->container_content && st->loaded_container) { + if ((runstop > 0 && info.container_enough >= 0) || + info.container_enough > 0) + /* pass */; else { - sprintf(nbuf, "/dev/md/%s", np); - nm = nbuf; - } - if (stat(nm, &stb) == 0 && - S_ISBLK(stb.st_mode) && - major(stb.st_rdev) == (use_partitions ? - get_mdp_major() : MD_MAJOR)) { - if (use_partitions) - devnum = minor(stb.st_rdev) >> MdpMinorShift; - else - devnum = minor(stb.st_rdev); - if (mddev_busy(use_partitions ? (-1-devnum) : devnum)) - devnum = -1; + if (verbose) + fprintf(stderr, Name ": not enough devices to start the container\n"); + rv = 0; + goto out; } - if (devnum < 0) { - /* Haven't found anything yet, choose something free */ - devnum = find_free_devnum(use_partitions); - - if (devnum == NoMdDev) { - fprintf(stderr, Name - ": No spare md devices!!\n"); - return 2; - } - } else - devnum = use_partitions ? (-1-devnum) : devnum; + /* This is a pre-built container array, so we do something + * rather different. + */ + rv = Incremental_container(st, devname, verbose, runstop, + autof, trustworthy); + goto out; } - mdfd = open_mddev_devnum(match ? match->devname : NULL, - devnum, - info.name, - chosen_name, autof >> 3); - if (mdfd < 0) { - fprintf(stderr, Name ": failed to open %s: %s.\n", - chosen_name, strerror(errno)); - return 2; + + name_to_use = info.name; + if (name_to_use[0] == 0 && + info.array.level == LEVEL_CONTAINER && + trustworthy == LOCAL) { + name_to_use = info.text_version; + trustworthy = METADATA; } - sysfs_init(&info, mdfd, 0); + if (name_to_use[0] && trustworthy != LOCAL && + ! require_homehost && + conf_name_is_free(name_to_use)) + trustworthy = LOCAL; - /* 5/ Find out if array already exists */ - if (! mddev_busy(devnum)) { - /* 5a/ if it does not */ - /* - choose a name, from mdadm.conf or 'name' field in array. */ - /* - create the array */ - /* - add the device */ + /* strip "hostname:" prefix from name if we have decided + * to treat it as LOCAL + */ + if (trustworthy == LOCAL && strchr(name_to_use, ':') != NULL) + name_to_use = strchr(name_to_use, ':')+1; + + /* 4/ Check if array exists. + */ + if (map_lock(&map)) + fprintf(stderr, Name ": failed to get exclusive lock on " + "mapfile\n"); + mp = map_by_uuid(&map, info.uuid); + if (mp) + mdfd = open_dev(mp->devnum); + else + mdfd = -1; + + if (mdfd < 0) { struct mdinfo *sra; + /* Couldn't find an existing array, maybe make a new one */ + mdfd = create_mddev(match ? match->devname : NULL, + name_to_use, autof, trustworthy, chosen_name); + + if (mdfd < 0) + goto out; + + sysfs_init(&info, mdfd, 0); + if (set_array_info(mdfd, st, &info) != 0) { fprintf(stderr, Name ": failed to set array info for %s: %s\n", chosen_name, strerror(errno)); - close(mdfd); - return 2; + rv = 2; + goto out; } - info.disk.major = major(stb.st_rdev); - info.disk.minor = minor(stb.st_rdev); - if (add_disk(mdfd, st, &info, &info) != 0) { + dinfo = info; + dinfo.disk.major = major(stb.st_rdev); + dinfo.disk.minor = minor(stb.st_rdev); + if (add_disk(mdfd, st, &info, &dinfo) != 0) { fprintf(stderr, Name ": failed to add %s to %s: %s.\n", devname, chosen_name, strerror(errno)); ioctl(mdfd, STOP_ARRAY, 0); - close(mdfd); - return 2; + rv = 2; + goto out; } - sra = sysfs_read(mdfd, devnum, GET_DEVS); + sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS); if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) { /* It really should be 'none' - must be old buggy * kernel, and mdadm -I may not be able to complete. @@ -329,11 +353,16 @@ int Incremental(char *devname, int verbose, int runstop, fprintf(stderr, Name ": You have an old buggy kernel which cannot support\n" " --incremental reliably. Aborting.\n"); - close(mdfd); sysfs_free(sra); - return 2; + rv = 2; + goto out; } + info.array.working_disks = 1; sysfs_free(sra); + /* 6/ Make sure /var/run/mdadm.map contains this array. */ + map_update(&map, fd2devnum(mdfd), + info.text_version, + info.uuid, chosen_name); } else { /* 5b/ if it does */ /* - check one drive in array to make sure metadata is a reasonably */ @@ -344,61 +373,123 @@ int Incremental(char *devname, int verbose, int runstop, int err; struct mdinfo *sra; struct supertype *st2; - struct mdinfo info2; - sra = sysfs_read(mdfd, devnum, (GET_DEVS | GET_STATE)); - - sprintf(dn, "%d:%d", sra->devs->disk.major, - sra->devs->disk.minor); - dfd2 = dev_open(dn, O_RDONLY); - st2 = dup_super(st); - if (st2->ss->load_super(st2, dfd2, NULL) || - st->ss->compare_super(st, st2) != 0) { - fprintf(stderr, Name - ": metadata mismatch between %s and " - "chosen array %s\n", - devname, chosen_name); - close(mdfd); - close(dfd2); - return 2; + struct mdinfo info2, *d; + + if (mp->path) + strcpy(chosen_name, mp->path); + else + strcpy(chosen_name, devnum2devname(mp->devnum)); + + /* It is generally not OK to add non-spare drives to a + * running array as they are probably missing because + * they failed. However if runstop is 1, then the + * array was possibly started early and our best bet is + * to add this anyway. + * Also if action policy is re-add or better we allow + * re-add + */ + if ((info.disk.state & (1<ss->name, + act_re_add) + && runstop < 1) { + int active = 0; + + if (st->ss->external) { + char *devname = devnum2devname(fd2devnum(mdfd)); + + active = devname && is_container_active(devname); + free(devname); + } else if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) + active = 1; + if (active) { + fprintf(stderr, Name + ": not adding %s to active array (without --run) %s\n", + devname, chosen_name); + rv = 2; + goto out; + } } - close(dfd2); - memset(&info2, 0, sizeof(info2)); - st2->ss->getinfo_super(st2, &info2); - st2->ss->free_super(st2); - if (info.array.level != info2.array.level || - memcmp(info.uuid, info2.uuid, 16) != 0 || - info.array.raid_disks != info2.array.raid_disks) { - fprintf(stderr, Name - ": unexpected difference between %s and %s.\n", - chosen_name, devname); - close(mdfd); - return 2; + sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE)); + if (!sra) { + rv = 2; + goto out; + } + if (sra->devs) { + sprintf(dn, "%d:%d", sra->devs->disk.major, + sra->devs->disk.minor); + dfd2 = dev_open(dn, O_RDONLY); + st2 = dup_super(st); + if (st2->ss->load_super(st2, dfd2, NULL) || + st->ss->compare_super(st, st2) != 0) { + fprintf(stderr, Name + ": metadata mismatch between %s and " + "chosen array %s\n", + devname, chosen_name); + close(dfd2); + rv = 2; + goto out; + } + close(dfd2); + memset(&info2, 0, sizeof(info2)); + st2->ss->getinfo_super(st2, &info2); + st2->ss->free_super(st2); + if (info.array.level != info2.array.level || + memcmp(info.uuid, info2.uuid, 16) != 0 || + info.array.raid_disks != info2.array.raid_disks) { + fprintf(stderr, Name + ": unexpected difference between %s and %s.\n", + chosen_name, devname); + rv = 2; + goto out; + } } info2.disk.major = major(stb.st_rdev); info2.disk.minor = minor(stb.st_rdev); - err = add_disk(mdfd, st2, sra, &info2); + /* add disk needs to know about containers */ + if (st->ss->external) + sra->array.level = LEVEL_CONTAINER; + err = add_disk(mdfd, st, sra, &info2); if (err < 0 && errno == EBUSY) { /* could be another device present with the same * disk.number. Find and reject any such */ find_reject(mdfd, st, sra, info.disk.number, info.events, verbose, chosen_name); - err = add_disk(mdfd, st2, sra, &info2); + err = add_disk(mdfd, st, sra, &info2); } if (err < 0) { fprintf(stderr, Name ": failed to add %s to %s: %s.\n", devname, chosen_name, strerror(errno)); - close(mdfd); - return 2; + rv = 2; + goto out; } + info.array.working_disks = 0; + for (d = sra->devs; d; d=d->next) + info.array.working_disks ++; + } - /* 6/ Make sure /var/run/mdadm.map contains this array. */ - map_update(&map, devnum, - info.text_version, - info.uuid, chosen_name); /* 7/ Is there enough devices to possibly start the array? */ /* 7a/ if not, finish with success. */ + if (info.array.level == LEVEL_CONTAINER) { + /* Try to assemble within the container */ + map_unlock(&map); + sysfs_uevent(&info, "change"); + if (verbose >= 0) + fprintf(stderr, Name + ": container %s now has %d devices\n", + chosen_name, info.array.working_disks); + wait_for(chosen_name, mdfd); + close(mdfd); + rv = Incremental(chosen_name, verbose, runstop, + NULL, homehost, require_homehost, autof); + if (rv == 1) + /* Don't fail the whole -I if a subarray didn't + * have enough devices to start yet + */ + rv = 0; + return rv; + } avail = NULL; active_disks = count_active(st, mdfd, &avail, &info); if (enough(info.array.level, info.array.raid_disks, @@ -409,8 +500,9 @@ int Incremental(char *devname, int verbose, int runstop, fprintf(stderr, Name ": %s attached to %s, not enough to start (%d).\n", devname, chosen_name, active_disks); - close(mdfd); - return 0; + map_unlock(&map); + rv = 0; + goto out; } free(avail); @@ -419,20 +511,20 @@ int Incremental(char *devname, int verbose, int runstop, /* are enough, */ /* + add any bitmap file */ /* + start the array (auto-readonly). */ -{ - mdu_array_info_t ainf; if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) { if (verbose >= 0) fprintf(stderr, Name ": %s attached to %s which is already active.\n", devname, chosen_name); - close (mdfd); - return 0; + map_unlock(&map); + rv = 0; + goto out; } -} + + map_unlock(&map); if (runstop > 0 || active_disks >= info.array.working_disks) { - struct mdinfo *sra; + struct mdinfo *sra, *dsk; /* Let's try to start it */ if (match && match->bitmap_file) { int bmfd = open(match->bitmap_file, O_RDWR); @@ -440,21 +532,22 @@ int Incremental(char *devname, int verbose, int runstop, fprintf(stderr, Name ": Could not open bitmap file %s.\n", match->bitmap_file); - close(mdfd); - return 1; + goto out; } if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) { close(bmfd); fprintf(stderr, Name ": Failed to set bitmapfile for %s.\n", chosen_name); - close(mdfd); - return 1; + goto out; } close(bmfd); } - sra = sysfs_read(mdfd, devnum, 0); - if (sra == NULL || active_disks >= info.array.working_disks) + /* GET_* needed so add_disk works below */ + sra = sysfs_read(mdfd, fd2devnum(mdfd), + GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE); + if ((sra == NULL || active_disks >= info.array.working_disks) + && trustworthy != FOREIGN) rv = ioctl(mdfd, RUN_ARRAY, NULL); else rv = sysfs_set_str(sra, NULL, @@ -462,9 +555,23 @@ int Incremental(char *devname, int verbose, int runstop, if (rv == 0) { if (verbose >= 0) fprintf(stderr, Name - ": %s attached to %s, which has been started.\n", + ": %s attached to %s, which has been started.\n", devname, chosen_name); rv = 0; + wait_for(chosen_name, mdfd); + /* We just started the array, so some devices + * might have been evicted from the array + * because their event counts were too old. + * If the action=re-add policy is in-force for + * those devices we should re-add them now. + */ + for (dsk = sra->devs; dsk ; dsk = dsk->next) { + if (disk_action_allows(dsk, st->ss->name, act_re_add) && + add_disk(mdfd, st, sra, dsk) == 0) + fprintf(stderr, Name + ": %s re-added to %s\n", + dsk->sys_name, chosen_name); + } } else { fprintf(stderr, Name ": %s attached to %s, but failed to start: %s.\n", @@ -478,7 +585,13 @@ int Incremental(char *devname, int verbose, int runstop, devname, chosen_name); rv = 0; } - close(mdfd); +out: + if (dfd >= 0) + close(dfd); + if (mdfd >= 0) + close(mdfd); + if (policy) + dev_policy_free(policy); return rv; } @@ -536,6 +649,9 @@ static int count_active(struct supertype *st, int mdfd, char **availp, struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE); char *avail = NULL; + if (!sra) + return 0; + for (d = sra->devs ; d ; d = d->next) { char dn[30]; int dfd; @@ -551,12 +667,18 @@ static int count_active(struct supertype *st, int mdfd, char **availp, if (ok != 0) continue; st->ss->getinfo_super(st, &info); + if (!avail) { + avail = malloc(info.array.raid_disks); + if (!avail) { + fprintf(stderr, Name ": out of memory.\n"); + exit(1); + } + memset(avail, 0, info.array.raid_disks); + *availp = avail; + } + if (info.disk.state & (1<next) { - struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_DEVS); - struct mdinfo *sd; - - for (sd = sra->devs ; sd ; sd = sd->next) { - char dn[30]; - int dfd; - int ok; - struct supertype *st; - char *path; - struct mdinfo info; - - sprintf(dn, "%d:%d", sd->disk.major, sd->disk.minor); - dfd = dev_open(dn, O_RDONLY); - if (dfd < 0) + /* This device doesn't have any md metadata + * If it is 'bare' and theh device policy allows 'spare' look for + * an array or container to attach it to. + * If st is set, then only arrays of that type are considered + * Return 0 on success, or some exit code on failure, probably 1. + */ + int rv = -1; + char bufpad[4096 + 4096]; + char *buf = (char*)(((long)bufpad + 4096) & ~4095); + struct stat stb; + struct map_ent *mp, *map = NULL; + struct mdinfo *chosen = NULL; + int dfd = *dfdp; + + /* First check policy */ + if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare)) + return 1; + + if (fstat(dfd, &stb) != 0) + return 1; + /* Now check if the device is bare - we don't add non-bare devices + * yet even if action=-spare + */ + + if (lseek(dfd, 0, SEEK_SET) != 0 || + read(dfd, buf, 4096) != 4096) { + not_bare: + if (verbose > 1) + fprintf(stderr, Name ": %s is not bare, so not considering as a spare\n", + devname); + return 1; + } + if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff') + goto not_bare; + if (memcmp(buf, buf+1, 4095) != 0) + goto not_bare; + + /* OK, first 4K appear blank, try the end. */ + if (lseek(dfd, -4096, SEEK_END) < 0 || + read(dfd, buf, 4096) != 4096) + goto not_bare; + + if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff') + goto not_bare; + if (memcmp(buf, buf+1, 4095) != 0) + goto not_bare; + + /* This device passes our test for 'is bare'. + * Now we need to find a suitable array to add this to. + * We only accept arrays that: + * - match 'st' + * - are in the same domains as the device + * - are of an size for which the device will be useful + * and we choose the one that is the most degraded + */ + + if (map_lock(&map)) { + fprintf(stderr, Name ": failed to get exclusive lock on " + "mapfile\n"); + return 1; + } + for (mp = map ; mp ; mp = mp->next) { + struct supertype *st2; + struct domainlist *dl = NULL; + struct mdinfo *sra; + unsigned long long devsize; + + if (is_subarray(mp->metadata)) + continue; + if (st) { + st2 = st->ss->match_metadata_desc(mp->metadata); + if (!st2 || + (st->minor_version >= 0 && + st->minor_version != st2->minor_version)) { + if (verbose > 1) + fprintf(stderr, Name ": not adding %s to %s as metadata type doesn't match\n", + devname, mp->path); + free(st2); continue; - st = guess_super(dfd); - if ( st == NULL) - ok = -1; - else - ok = st->ss->load_super(st, dfd, NULL); + } + free(st2); + } + sra = sysfs_read(-1, mp->devnum, + GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE| + GET_DEGRADED|GET_COMPONENT|GET_VERSION); + if (!sra) { + /* Probably a container - no degraded info */ + sra = sysfs_read(-1, mp->devnum, + GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE| + GET_COMPONENT|GET_VERSION); + if (sra) + sra->array.failed_disks = 0; + } + if (!sra) + continue; + if (st == NULL) { + int i; + st2 = NULL; + for(i=0; !st2 && superlist[i]; i++) + st2 = superlist[i]->match_metadata_desc( + sra->text_version); + } else + st2 = st; + get_dev_size(dfd, NULL, &devsize); + if (st2->ss->avail_size(st2, devsize) < sra->component_size) { + if (verbose > 1) + fprintf(stderr, Name ": not adding %s to %s as it is too small\n", + devname, mp->path); + goto next; + } + dl = domain_from_array(sra, st2->ss->name); + if (!domain_test(dl, pol, st2->ss->name)) { + /* domain test fails */ + if (verbose > 1) + fprintf(stderr, Name ": not adding %s to %s as it is not in a compatible domain\n", + devname, mp->path); + + goto next; + } + /* all tests passed, OK to add to this array */ + if (!chosen) { + chosen = sra; + sra = NULL; + } else if (chosen->array.failed_disks < sra->array.failed_disks) { + sysfs_free(chosen); + chosen = sra; + sra = NULL; + } + next: + if (sra) + sysfs_free(sra); + if (st != st2) + free(st2); + if (dl) + domain_free(dl); + } + if (chosen) { + /* add current device to chosen array as a spare */ + int mdfd = open_dev(devname2devnum(chosen->sys_name)); + if (mdfd >= 0) { + struct mddev_dev_s devlist; + char devname[20]; + devlist.next = NULL; + devlist.used = 0; + devlist.re_add = 0; + devlist.writemostly = 0; + devlist.devname = devname; + sprintf(devname, "%d:%d", major(stb.st_rdev), + minor(stb.st_rdev)); + devlist.disposition = 'a'; close(dfd); - if (ok != 0) - continue; - st->ss->getinfo_super(st, &info); - if (md->devnum > 0) - path = map_dev(MD_MAJOR, md->devnum, 0); + *dfdp = -1; + rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist, + -1, 0); + close(mdfd); + } + if (verbose > 0) { + if (rv == 0) + fprintf(stderr, Name ": added %s as spare for %s\n", + devname, chosen->sys_name); else - path = map_dev(mdp, (-1-md->devnum)<< 6, 0); - map_add(&map, md->devnum, - info.text_version, - info.uuid, path ? : "/unknown"); - st->ss->free_super(st); - break; + fprintf(stderr, Name ": failed to add %s as spare for %s\n", + devname, chosen->sys_name); } + sysfs_free(chosen); } - map_write(map); - map_free(map); + return rv ? 0 : 1; } int IncrementalScan(int verbose) @@ -656,12 +903,11 @@ int IncrementalScan(int verbose) devs = conf_get_ident(NULL); for (me = mapl ; me ; me = me->next) { - char path[1024]; mdu_array_info_t array; mdu_bitmap_file_t bmf; struct mdinfo *sra; - int mdfd = open_mddev_devnum(me->path, me->devnum, - NULL, path, 0); + int mdfd = open_dev(me->devnum); + if (mdfd < 0) continue; if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 || @@ -671,7 +917,8 @@ int IncrementalScan(int verbose) } /* Ok, we can try this one. Maybe it needs a bitmap */ for (mddev = devs ; mddev ; mddev = mddev->next) - if (strcmp(mddev->devname, me->path) == 0) + if (mddev->devname && me->path + && devname_matches(mddev->devname, me->path)) break; if (mddev && mddev->bitmap_file) { /* @@ -705,11 +952,12 @@ int IncrementalScan(int verbose) if (verbose >= 0) fprintf(stderr, Name ": started array %s\n", - me->path); + me->path ?: devnum2devname(me->devnum)); } else { fprintf(stderr, Name ": failed to start array %s: %s\n", - me->path, strerror(errno)); + me->path ?: devnum2devname(me->devnum), + strerror(errno)); rv = 1; } } @@ -719,19 +967,31 @@ int IncrementalScan(int verbose) static char *container2devname(char *devname) { - int fd = open(devname, O_RDONLY); char *mdname = NULL; - if (fd >= 0) { - mdname = devnum2devname(fd2devnum(fd)); - close(fd); + if (devname[0] == '/') { + int fd = open(devname, O_RDONLY); + if (fd >= 0) { + mdname = devnum2devname(fd2devnum(fd)); + close(fd); + } + } else { + int uuid[4]; + struct map_ent *mp, *map = NULL; + + if (!parse_uuid(devname, uuid)) + return mdname; + mp = map_by_uuid(&map, uuid); + if (mp) + mdname = devnum2devname(mp->devnum); + map_free(map); } return mdname; } int Incremental_container(struct supertype *st, char *devname, int verbose, - int runstop, int autof) + int runstop, int autof, int trustworthy) { /* Collect the contents of this container and for each * array, choose a device name and assemble the array. @@ -739,67 +999,83 @@ int Incremental_container(struct supertype *st, char *devname, int verbose, struct mdinfo *list = st->ss->container_content(st); struct mdinfo *ra; - char *mdname = container2devname(devname); + struct map_ent *map = NULL; - if (!mdname) { - fprintf(stderr, Name": failed to determine device name\n"); - return 2; - } + if (map_lock(&map)) + fprintf(stderr, Name ": failed to get exclusive lock on " + "mapfile\n"); for (ra = list ; ra ; ra = ra->next) { - struct mdinfo *dev; - int devnum = -1; int mdfd; char chosen_name[1024]; - int usepart = 1; - char *n; - int working = 0; - - if ((autof&7) == 3 || (autof&7) == 5) - usepart = 0; - - n = ra->name; - if (*n == 'd') - n++; - if (*n) { - devnum = strtoul(n, &n, 10); - if (devnum >= 0 && (*n == 0 || *n == ' ')) { - /* Use this devnum */ - usepart = (ra->name[0] == 'd'); - if (mddev_busy(usepart ? (-1-devnum) : devnum)) - devnum = -1; + struct map_ent *mp; + struct mddev_ident_s *match = NULL; + + mp = map_by_uuid(&map, ra->uuid); + + if (mp) { + mdfd = open_dev(mp->devnum); + if (mp->path) + strcpy(chosen_name, mp->path); + else + strcpy(chosen_name, devnum2devname(mp->devnum)); + } else { + + /* Check in mdadm.conf for container == devname and + * member == ra->text_version after second slash. + */ + char *sub = strchr(ra->text_version+1, '/'); + struct mddev_ident_s *array_list; + if (sub) { + sub++; + array_list = conf_get_ident(NULL); } else - devnum = -1; - } - - if (devnum < 0) { - char *nm = ra->name; - char nbuf[1024]; - struct stat stb; - if (strchr(nm, ':')) - nm = strchr(nm, ':')+1; - sprintf(nbuf, "/dev/md/%s", nm); - - if (stat(nbuf, &stb) == 0 && - S_ISBLK(stb.st_mode) && - major(stb.st_rdev) == (usepart ? - get_mdp_major() : MD_MAJOR)){ - if (usepart) - devnum = minor(stb.st_rdev) - >> MdpMinorShift; - else - devnum = minor(stb.st_rdev); - if (mddev_busy(usepart ? (-1-devnum) : devnum)) - devnum = -1; + array_list = NULL; + for(; array_list ; array_list = array_list->next) { + char *dn; + if (array_list->member == NULL || + array_list->container == NULL) + continue; + if (strcmp(array_list->member, sub) != 0) + continue; + if (array_list->uuid_set && + !same_uuid(ra->uuid, array_list->uuid, st->ss->swapuuid)) + continue; + dn = container2devname(array_list->container); + if (dn == NULL) + continue; + if (strncmp(dn, ra->text_version+1, + strlen(dn)) != 0 || + ra->text_version[strlen(dn)+1] != '/') { + free(dn); + continue; + } + free(dn); + /* we have a match */ + match = array_list; + if (verbose>0) + fprintf(stderr, Name ": match found for member %s\n", + array_list->member); + break; } - } - if (devnum >= 0) - devnum = usepart ? (-1-devnum) : devnum; - else - devnum = find_free_devnum(usepart); - mdfd = open_mddev_devnum(NULL, devnum, ra->name, - chosen_name, autof>>3); + if (match && match->devname && + strcasecmp(match->devname, "") == 0) { + if (verbose > 0) + fprintf(stderr, Name ": array %s/%s is " + "explicitly ignored by mdadm.conf\n", + match->container, match->member); + return 2; + } + if (match) + trustworthy = LOCAL; + + mdfd = create_mddev(match ? match->devname : NULL, + ra->name, + autof, + trustworthy, + chosen_name); + } if (mdfd < 0) { fprintf(stderr, Name ": failed to open %s: %s.\n", @@ -807,40 +1083,51 @@ int Incremental_container(struct supertype *st, char *devname, int verbose, return 2; } - sysfs_init(ra, mdfd, 0); - sysfs_set_array(ra, md_get_version(mdfd)); - for (dev = ra->devs; dev; dev = dev->next) - if (sysfs_add_disk(ra, dev) == 0) - working++; - - if (runstop > 0 || working >= ra->array.working_disks) { - switch(ra->array.level) { - case LEVEL_LINEAR: - case LEVEL_MULTIPATH: - case 0: - sysfs_set_str(ra, NULL, "array_state", - "active"); - break; - default: - sysfs_set_str(ra, NULL, "array_state", - "readonly"); - /* start mdmon if needed. */ - if (!mdmon_running(st->container_dev)) - start_mdmon(st->container_dev); - ping_monitor(devnum2devname(st->container_dev)); - break; - } - sysfs_set_safemode(ra, ra->safe_mode_delay); - if (verbose >= 0) - printf("Started %s with %d devices\n", - chosen_name, working); - /* FIXME should have an O_EXCL and wait for read-auto */ - } else - if (verbose >= 0) - printf("%s assembled with %d devices but " - "not started\n", - chosen_name, working); - close(mdfd); + assemble_container_content(st, mdfd, ra, runstop, + chosen_name, verbose); } + map_unlock(&map); return 0; } + +/* + * IncrementalRemove - Attempt to see if the passed in device belongs to any + * raid arrays, and if so first fail (if needed) and then remove the device. + * + * @devname - The device we want to remove + * + * Note: the device name must be a kernel name like "sda", so + * that we can find it in /proc/mdstat + */ +int IncrementalRemove(char *devname, int verbose) +{ + int mdfd; + int rv; + struct mdstat_ent *ent; + struct mddev_dev_s devlist; + + if (strchr(devname, '/')) { + fprintf(stderr, Name ": incremental removal requires a " + "kernel device name, not a file: %s\n", devname); + return 1; + } + ent = mdstat_by_component(devname); + if (!ent) { + fprintf(stderr, Name ": %s does not appear to be a component " + "of any array\n", devname); + return 1; + } + mdfd = open_dev(ent->devnum); + if (mdfd < 0) { + fprintf(stderr, Name ": Cannot open array %s!!\n", ent->dev); + return 1; + } + memset(&devlist, 0, sizeof(devlist)); + devlist.devname = devname; + devlist.disposition = 'f'; + Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0); + devlist.disposition = 'r'; + rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0); + close(mdfd); + return rv; +}