X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Incremental.c;h=91301eb5e60997a18333f523bd48fd979e069b8e;hb=7105228e190870505b1d9163c104da9bdb42aa59;hp=bc23a885d5e404358646eb2f4a5b0c8d35b40336;hpb=cb8f6859d1f0c18bdac10353e45eb14825b24cd2;p=thirdparty%2Fmdadm.git diff --git a/Incremental.c b/Incremental.c index bc23a885..91301eb5 100644 --- a/Incremental.c +++ b/Incremental.c @@ -2,7 +2,7 @@ * Incremental.c - support --incremental. Part of: * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2006-2012 Neil Brown + * Copyright (C) 2006-2013 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -29,6 +29,7 @@ */ #include "mdadm.h" +#include #include #include @@ -43,9 +44,9 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol, struct supertype *st, int verbose); static int Incremental_container(struct supertype *st, char *devname, - struct context *c); + struct context *c, char *only); -int Incremental(char *devname, struct context *c, +int Incremental(struct mddev_dev *devlist, struct context *c, struct supertype *st) { /* Add this device to an array, creating the array if necessary @@ -85,11 +86,12 @@ int Incremental(char *devname, struct context *c, * - if number of OK devices match expected, or -R and there are enough, * start the array (auto-readonly). */ - struct stat stb; + dev_t rdev, rdev2; struct mdinfo info, dinfo; struct mdinfo *sra = NULL, *d; struct mddev_ident *match; char chosen_name[1024]; + char *md_devname; int rv = 1; struct map_ent *mp, *map = NULL; int dfd = -1, mdfd = -1; @@ -97,25 +99,16 @@ int Incremental(char *devname, struct context *c, int active_disks; int trustworthy; char *name_to_use; - mdu_array_info_t ainf; struct dev_policy *policy = NULL; struct map_ent target_array; int have_target; + char *devname = devlist->devname; + int journal_device_missing = 0; struct createinfo *ci = conf_get_create_info(); - if (stat(devname, &stb) < 0) { - if (c->verbose >= 0) - pr_err("stat failed for %s: %s.\n", - devname, strerror(errno)); + if (!stat_is_blkdev(devname, &rdev)) return rv; - } - if ((stb.st_mode & S_IFMT) != S_IFBLK) { - if (c->verbose >= 0) - pr_err("%s is not a block device.\n", - devname); - return rv; - } dfd = dev_open(devname, O_RDONLY); if (dfd < 0) { if (c->verbose >= 0) @@ -127,17 +120,16 @@ int Incremental(char *devname, struct context *c, if (must_be_container(dfd)) { if (!st) st = super_by_fd(dfd, NULL); - if (st) - st->ignore_hw_compat = 1; if (st && st->ss->load_container) rv = st->ss->load_container(st, dfd, NULL); close(dfd); if (!rv && st->ss->container_content) { if (map_lock(&map)) - pr_err("failed to get " - "exclusive lock on mapfile\n"); - rv = Incremental_container(st, devname, c); + pr_err("failed to get exclusive lock on mapfile\n"); + if (c->export) + printf("MD_DEVNAME=%s\n", devname); + rv = Incremental_container(st, devname, c, NULL); map_unlock(&map); return rv; } @@ -149,7 +141,18 @@ int Incremental(char *devname, struct context *c, /* 1/ Check if device is permitted by mdadm.conf */ - if (!conf_test_dev(devname)) { + for (;devlist; devlist = devlist->next) + if (conf_test_dev(devlist->devname)) + break; + if (!devlist) { + devlist = conf_get_devs(); + for (;devlist; devlist = devlist->next) { + if (stat_is_blkdev(devlist->devname, &rdev2) && + rdev2 == rdev) + break; + } + } + if (!devlist) { if (c->verbose >= 0) pr_err("%s not permitted by mdadm.conf.\n", devname); @@ -159,37 +162,28 @@ int Incremental(char *devname, struct context *c, /* 2/ Find metadata, reject if none appropriate (check * version/name from args) */ - if (fstat(dfd, &stb) < 0) { - if (c->verbose >= 0) - pr_err("fstat failed for %s: %s.\n", - devname, strerror(errno)); + if (!fstat_is_blkdev(dfd, devname, &rdev)) goto out; - } - if ((stb.st_mode & S_IFMT) != S_IFBLK) { - if (c->verbose >= 0) - pr_err("%s is not a block device.\n", - devname); - goto out; - } - dinfo.disk.major = major(stb.st_rdev); - dinfo.disk.minor = minor(stb.st_rdev); + dinfo.disk.major = major(rdev); + dinfo.disk.minor = minor(rdev); policy = disk_policy(&dinfo); have_target = policy_check_path(&dinfo, &target_array); - if (st == NULL && (st = guess_super(dfd)) == NULL) { + if (st == NULL && (st = guess_super_type(dfd, guess_array)) == NULL) { if (c->verbose >= 0) pr_err("no recognisable superblock on %s.\n", devname); rv = try_spare(devname, &dfd, policy, have_target ? &target_array : NULL, - st, c->verbose); + NULL, c->verbose); goto out; } - st->ignore_hw_compat = 1; + st->ignore_hw_compat = 0; + if (st->ss->compare_super == NULL || - st->ss->load_super(st, dfd, NULL)) { + st->ss->load_super(st, dfd, c->verbose >= 0 ? devname : NULL)) { if (c->verbose >= 0) pr_err("no RAID superblock on %s.\n", devname); @@ -208,11 +202,10 @@ int Incremental(char *devname, struct context *c, if (!match && rv == 2) goto out; - if (match && match->devname - && strcasecmp(match->devname, "") == 0) { + if (match && match->devname && + strcasecmp(match->devname, "") == 0) { if (c->verbose >= 0) - pr_err("array containing %s is explicitly" - " ignored by mdadm.conf\n", + pr_err("array containing %s is explicitly ignored by mdadm.conf\n", devname); goto out; } @@ -233,8 +226,7 @@ int Incremental(char *devname, struct context *c, if (!match && !conf_test_metadata(st->ss->name, policy, (trustworthy == LOCAL))) { if (c->verbose >= 1) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, st->ss->name); goto out; } @@ -252,8 +244,7 @@ int Incremental(char *devname, struct context *c, c->autof = ci->autof; name_to_use = info.name; - if (name_to_use[0] == 0 && - info.array.level == LEVEL_CONTAINER) { + if (name_to_use[0] == 0 && info.array.level == LEVEL_CONTAINER) { name_to_use = info.text_version; trustworthy = METADATA; } @@ -271,8 +262,7 @@ int Incremental(char *devname, struct context *c, /* 4/ Check if array exists. */ if (map_lock(&map)) - pr_err("failed to get exclusive lock on " - "mapfile\n"); + pr_err("failed to get exclusive lock on mapfile\n"); /* Now check we can get O_EXCL. If not, probably "mdadm -A" has * taken over */ @@ -285,26 +275,39 @@ int Incremental(char *devname, struct context *c, } /* Cannot hold it open while we add the device to the array, * so we must release the O_EXCL and depend on the map_lock() + * So now is the best time to remove any partitions. */ + remove_partitions(dfd); close(dfd); dfd = -1; mp = map_by_uuid(&map, info.uuid); if (mp) - mdfd = open_dev(mp->devnum); + mdfd = open_dev(mp->devnm); else mdfd = -1; if (mdfd < 0) { + /* Skip the clustered ones. This should be started by + * clustering resource agents + */ + if (info.array.state & (1 << MD_SB_CLUSTERED)) + goto out; + /* Couldn't find an existing array, maybe make a new one */ mdfd = create_mddev(match ? match->devname : NULL, - name_to_use, c->autof, trustworthy, chosen_name); + name_to_use, c->autof, trustworthy, chosen_name, 0); if (mdfd < 0) goto out_unlock; - sysfs_init(&info, mdfd, 0); + if (sysfs_init(&info, mdfd, NULL)) { + pr_err("unable to initialize sysfs for %s\n", + chosen_name); + rv = 2; + goto out_unlock; + } if (set_array_info(mdfd, st, &info) != 0) { pr_err("failed to set array info for %s: %s\n", @@ -314,17 +317,17 @@ int Incremental(char *devname, struct context *c, } dinfo = info; - dinfo.disk.major = major(stb.st_rdev); - dinfo.disk.minor = minor(stb.st_rdev); + dinfo.disk.major = major(rdev); + dinfo.disk.minor = minor(rdev); if (add_disk(mdfd, st, &info, &dinfo) != 0) { - pr_err("failed to add %s to %s: %s.\n", + pr_err("failed to add %s to new array %s: %s.\n", devname, chosen_name, strerror(errno)); ioctl(mdfd, STOP_ARRAY, 0); rv = 2; goto out_unlock; } - sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE | - GET_OFFSET | GET_SIZE)); + sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE | + GET_OFFSET | GET_SIZE)); if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) { /* It really should be 'none' - must be old buggy @@ -332,14 +335,13 @@ int Incremental(char *devname, struct context *c, * So reject it. */ ioctl(mdfd, STOP_ARRAY, NULL); - pr_err("You have an old buggy kernel which cannot support\n" - " --incremental reliably. Aborting.\n"); + pr_err("You have an old buggy kernel which cannot support\n --incremental reliably. Aborting.\n"); rv = 2; goto out_unlock; } info.array.working_disks = 1; /* 6/ Make sure /var/run/mdadm.map contains this array. */ - map_update(&map, fd2devnum(mdfd), + map_update(&map, fd2devnm(mdfd), info.text_version, info.uuid, chosen_name); } else { @@ -353,13 +355,13 @@ int Incremental(char *devname, struct context *c, struct supertype *st2; struct mdinfo info2, *d; - sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE | + sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE | GET_OFFSET | GET_SIZE)); if (mp->path) strcpy(chosen_name, mp->path); else - strcpy(chosen_name, devnum2devname(mp->devnum)); + strcpy(chosen_name, mp->devnm); /* It is generally not OK to add non-spare drives to a * running array as they are probably missing because @@ -372,12 +374,11 @@ int Incremental(char *devname, struct context *c, * flag has a different meaning. The test has to happen * at the device level there */ - if (!st->ss->external - && (info.disk.state & (1<ss->name, - act_re_add) - && c->runstop < 1) { - if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) { + if (!st->ss->external && + (info.disk.state & (1 << MD_DISK_SYNC)) != 0 && + !policy_action_allows(policy, st->ss->name, act_re_add) && + c->runstop < 1) { + if (md_array_active(mdfd)) { pr_err("not adding %s to active array (without --run) %s\n", devname, chosen_name); rv = 2; @@ -400,8 +401,7 @@ int Incremental(char *devname, struct context *c, st2 = dup_super(st); if (st2->ss->load_super(st2, dfd2, NULL) || st->ss->compare_super(st, st2) != 0) { - pr_err("metadata mismatch between %s and " - "chosen array %s\n", + pr_err("metadata mismatch between %s and chosen array %s\n", devname, chosen_name); close(dfd2); rv = 2; @@ -419,11 +419,15 @@ int Incremental(char *devname, struct context *c, goto out_unlock; } } - info.disk.major = major(stb.st_rdev); - info.disk.minor = minor(stb.st_rdev); + info.disk.major = major(rdev); + info.disk.minor = minor(rdev); /* add disk needs to know about containers */ if (st->ss->external) sra->array.level = LEVEL_CONTAINER; + + if (info.array.state & (1 << MD_SB_CLUSTERED)) + info.disk.state |= (1 << MD_DISK_CLUSTER_ADD); + err = add_disk(mdfd, st, sra, &info); if (err < 0 && errno == EBUSY) { /* could be another device present with the same @@ -433,8 +437,20 @@ int Incremental(char *devname, struct context *c, info.events, c->verbose, chosen_name); err = add_disk(mdfd, st, sra, &info); } + if (err < 0 && errno == EINVAL && + info.disk.state & (1<ss->name, + act_force_spare)) { + info.disk.state &= ~(1<verbose >= 0) + pr_err("can only add %s to %s as a spare, and force-spare is not set.\n", + devname, chosen_name); + } if (err < 0) { - pr_err("failed to add %s to %s: %s.\n", + pr_err("failed to add %s to existing array %s: %s.\n", devname, chosen_name, strerror(errno)); rv = 2; goto out_unlock; @@ -444,36 +460,40 @@ int Incremental(char *devname, struct context *c, info.array.working_disks ++; } + if (strncmp(chosen_name, "/dev/md/", 8) == 0) + md_devname = chosen_name+8; + else + md_devname = chosen_name; + if (c->export) { + printf("MD_DEVICE=%s\n", fd2devnm(mdfd)); + printf("MD_DEVNAME=%s\n", md_devname); + printf("MD_FOREIGN=%s\n", trustworthy == FOREIGN ? "yes" : "no"); + } /* 7/ Is there enough devices to possibly start the array? */ /* 7a/ if not, finish with success. */ if (info.array.level == LEVEL_CONTAINER) { - int devnum = devnum; /* defined and used iff ->external */ + char devnm[32]; /* Try to assemble within the container */ sysfs_uevent(sra, "change"); - if (c->verbose >= 0) + if (!c->export && c->verbose >= 0) pr_err("container %s now has %d device%s\n", chosen_name, info.array.working_disks, info.array.working_disks == 1?"":"s"); wait_for(chosen_name, mdfd); if (st->ss->external) - devnum = fd2devnum(mdfd); + strcpy(devnm, fd2devnm(mdfd)); if (st->ss->load_container) rv = st->ss->load_container(st, mdfd, NULL); close(mdfd); sysfs_free(sra); if (!rv) - rv = Incremental_container(st, chosen_name, c); + rv = Incremental_container(st, chosen_name, c, NULL); map_unlock(&map); - if (rv == 1) - /* Don't fail the whole -I if a subarray didn't - * have enough devices to start yet - */ - rv = 0; /* after spare is added, ping monitor for external metadata * so that it can eg. try to rebuild degraded array */ if (st->ss->external) - ping_monitor_by_id(devnum); + ping_monitor(devnm); return rv; } @@ -482,13 +502,20 @@ int Incremental(char *devname, struct context *c, * things change. */ sysfs_free(sra); - sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE | + sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE | GET_OFFSET | GET_SIZE)); active_disks = count_active(st, sra, mdfd, &avail, &info); + + journal_device_missing = (info.journal_device_required) && (info.journal_clean == 0); + + if (info.consistency_policy == CONSISTENCY_POLICY_PPL) + info.array.state |= 1; + if (enough(info.array.level, info.array.raid_disks, - info.array.layout, info.array.state & 1, - avail) == 0) { - if (c->verbose >= 0) + info.array.layout, info.array.state & 1, avail) == 0) { + if (c->export) { + printf("MD_STARTED=no\n"); + } else if (c->verbose >= 0) pr_err("%s attached to %s, not enough to start (%d).\n", devname, chosen_name, active_disks); rv = 0; @@ -501,8 +528,10 @@ int Incremental(char *devname, struct context *c, /* + add any bitmap file */ /* + start the array (auto-readonly). */ - if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) { - if (c->verbose >= 0) + if (md_array_active(mdfd)) { + if (c->export) { + printf("MD_STARTED=already\n"); + } else if (c->verbose >= 0) pr_err("%s attached to %s which is already active.\n", devname, chosen_name); rv = 0; @@ -510,15 +539,16 @@ int Incremental(char *devname, struct context *c, } map_unlock(&map); - if (c->runstop > 0 || active_disks >= info.array.working_disks) { + if (c->runstop > 0 || (!journal_device_missing && active_disks >= info.array.working_disks)) { struct mdinfo *dsk; /* Let's try to start it */ + if (journal_device_missing) + pr_err("Trying to run with missing journal device\n"); if (info.reshape_active && !(info.reshape_active & RESHAPE_NO_BACKUP)) { - fprintf(stderr, Name - ": %s: This array is being reshaped and cannot be started\n" - " by --incremental. Please use --assemble\n", - chosen_name); + pr_err("%s: This array is being reshaped and cannot be started\n", + chosen_name); + cont_err("by --incremental. Please use --assemble\n"); goto out; } if (match && match->bitmap_file) { @@ -543,14 +573,20 @@ int Incremental(char *devname, struct context *c, if (d->disk.state & (1<= info.array.working_disks) - && trustworthy != FOREIGN) + if ((sra == NULL || active_disks >= info.array.working_disks) && + trustworthy != FOREIGN) rv = ioctl(mdfd, RUN_ARRAY, NULL); else rv = sysfs_set_str(sra, NULL, "array_state", "read-auto"); + /* Array might be O_EXCL which will interfere with + * fsck and mount. So re-open without O_EXCL. + */ + reopen_mddev(mdfd); if (rv == 0) { - if (c->verbose >= 0) + if (c->export) { + printf("MD_STARTED=yes\n"); + } else if (c->verbose >= 0) pr_err("%s attached to %s, which has been started.\n", devname, chosen_name); rv = 0; @@ -562,7 +598,8 @@ int Incremental(char *devname, struct context *c, * those devices we should re-add them now. */ for (dsk = sra->devs; dsk ; dsk = dsk->next) { - if (disk_action_allows(dsk, st->ss->name, act_re_add) && + if (disk_action_allows(dsk, st->ss->name, + act_re_add) && add_disk(mdfd, st, sra, dsk) == 0) pr_err("%s re-added to %s\n", dsk->sys_name, chosen_name); @@ -573,7 +610,11 @@ int Incremental(char *devname, struct context *c, rv = 1; } } else { - if (c->verbose >= 0) + if (c->export) { + printf("MD_STARTED=unsafe\n"); + } else if (journal_device_missing) { + pr_err("Journal device is missing, not safe to start yet.\n"); + } else if (c->verbose >= 0) pr_err("%s attached to %s, not enough to start safely.\n", devname, chosen_name); rv = 0; @@ -586,8 +627,7 @@ out: close(mdfd); if (policy) dev_policy_free(policy); - if (sra) - sysfs_free(sra); + sysfs_free(sra); return rv; out_unlock: map_unlock(&map); @@ -602,14 +642,13 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra, * and events less than the passed events, and remove the device. */ struct mdinfo *d; - mdu_array_info_t ra; - if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0) + if (md_array_active(mdfd)) return; /* not safe to remove from active arrays * without thinking more */ for (d = sra->devs; d ; d = d->next) { - char dn[10]; + char dn[24]; // 2*11 bytes for ints (including sign) + colon + null byte int dfd; struct mdinfo info; sprintf(dn, "%d:%d", d->disk.major, d->disk.minor); @@ -624,8 +663,7 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra, st->ss->free_super(st); close(dfd); - if (info.disk.number != number || - info.events >= events) + if (info.disk.number != number || info.events >= events) continue; if (d->disk.raid_disk > -1) @@ -646,6 +684,7 @@ static int count_active(struct supertype *st, struct mdinfo *sra, int cnt = 0; int replcnt = 0; __u64 max_events = 0; + __u64 max_journal_events = 0; char *avail = NULL; int *best = NULL; char *devmap = NULL; @@ -673,8 +712,12 @@ static int count_active(struct supertype *st, struct mdinfo *sra, close(dfd); if (ok != 0) continue; + info.array.raid_disks = raid_disks; st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum); + if (info.disk.raid_disk == MD_DISK_ROLE_JOURNAL && + info.events > max_journal_events) + max_journal_events = info.events; if (!avail) { raid_disks = info.array.raid_disks; avail = xcalloc(raid_disks, 1); @@ -724,6 +767,9 @@ static int count_active(struct supertype *st, struct mdinfo *sra, replcnt++; st->ss->free_super(st); } + if (max_journal_events >= max_events - 1) + bestinfo->journal_clean = 1; + if (!avail) return 0; /* We need to reject any device that thinks the best device is @@ -746,35 +792,42 @@ static int count_active(struct supertype *st, struct mdinfo *sra, if (avail[i]) cnt++; } + /* Also need to reject any spare device with an event count that + * is too high + */ + for (d = sra->devs; d; d = d->next) { + if (!(d->disk.state & (1<events > max_events) + d->disk.state |= (1 << MD_DISK_REMOVED); + } free(best); free(devmap); return cnt + replcnt; } /* test if container has degraded member(s) */ -static int container_members_max_degradation(struct map_ent *map, struct map_ent *me) +static int +container_members_max_degradation(struct map_ent *map, struct map_ent *me) { - mdu_array_info_t array; - int afd; - int max_degraded = 0; + struct mdinfo *sra; + int degraded, max_degraded = 0; for(; map; map = map->next) { - if (!is_subarray(map->metadata) || - devname2devnum(map->metadata+1) != me->devnum) - continue; - afd = open_dev(map->devnum); - if (afd < 0) + if (!metadata_container_matches(map->metadata, me->devnm)) continue; /* most accurate information regarding array degradation */ - if (ioctl(afd, GET_ARRAY_INFO, &array) >= 0) { - int degraded = array.raid_disks - array.active_disks - - array.spare_disks; - if (degraded > max_degraded) - max_degraded = degraded; - } - close(afd); + sra = sysfs_read(-1, map->devnm, + GET_DISKS | GET_DEVS | GET_STATE); + if (!sra) + continue; + degraded = sra->array.raid_disks - sra->array.active_disks - + sra->array.spare_disks; + if (degraded > max_degraded) + max_degraded = degraded; + sysfs_free(sra); } - return (max_degraded); + + return max_degraded; } static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, @@ -792,12 +845,12 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, * Return 0 on success, or some exit code on failure, probably 1. */ int rv = 1; - struct stat stb; + dev_t rdev; struct map_ent *mp, *map = NULL; struct mdinfo *chosen = NULL; int dfd = *dfdp; - if (fstat(dfd, &stb) != 0) + if (!fstat_is_blkdev(dfd, devname, &rdev)) return 1; /* @@ -810,8 +863,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, */ if (map_lock(&map)) { - pr_err("failed to get exclusive lock on " - "mapfile\n"); + pr_err("failed to get exclusive lock on mapfile\n"); return 1; } for (mp = map ; mp ; mp = mp->next) { @@ -819,7 +871,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, struct domainlist *dl = NULL; struct mdinfo *sra; unsigned long long devsize; - unsigned long long component_size = 0; + struct spare_criteria sc = {0, 0}; if (is_subarray(mp->metadata)) continue; @@ -836,18 +888,12 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, } free(st2); } - sra = sysfs_read(-1, mp->devnum, + sra = sysfs_read(-1, mp->devnm, GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE| - GET_DEGRADED|GET_COMPONENT|GET_VERSION); - if (!sra) { - /* Probably a container - no degraded info */ - sra = sysfs_read(-1, mp->devnum, - GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE| - GET_COMPONENT|GET_VERSION); - if (sra) - sra->array.failed_disks = -1; - } - if (!sra) + GET_COMPONENT|GET_VERSION); + if (sra) + sra->array.failed_disks = -1; + else continue; if (st == NULL) { int i; @@ -857,8 +903,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, sra->text_version); if (!st2) { if (verbose > 1) - pr_err("not adding %s to %s" - " as metadata not recognised.\n", + pr_err("not adding %s to %s as metadata not recognised.\n", devname, mp->path); goto next; } @@ -882,14 +927,15 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, /* true for containers, here we must read superblock * to obtain minimum spare size */ struct supertype *st3 = dup_super(st2); - int mdfd = open_dev(mp->devnum); + int mdfd = open_dev(mp->devnm); if (mdfd < 0) { free(st3); goto next; } if (st3->ss->load_container && !st3->ss->load_container(st3, mdfd, mp->path)) { - component_size = st3->ss->min_acceptable_spare_size(st3); + if (st3->ss->get_spare_criteria) + st3->ss->get_spare_criteria(st3, &sc); st3->ss->free_super(st3); } free(st3); @@ -897,12 +943,10 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, } if ((sra->component_size > 0 && st2->ss->avail_size(st2, devsize, - sra->devs - ? sra->devs->data_offset - : INVALID_SECTORS) - < sra->component_size) - || - (sra->component_size == 0 && devsize < component_size)) { + sra->devs ? sra->devs->data_offset : + INVALID_SECTORS) < + sra->component_size) || + (sra->component_size == 0 && devsize < sc.min_size)) { if (verbose > 1) pr_err("not adding %s to %s as it is too small\n", devname, mp->path); @@ -922,8 +966,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, sizeof(target->uuid)) == 0 && sra->array.failed_disks > 0) { /* This is our target!! */ - if (chosen) - sysfs_free(chosen); + sysfs_free(chosen); chosen = sra; sra = NULL; /* skip to end so we don't check any more */ @@ -940,8 +983,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, if (domain_test(dl, pol, st2->ss->name) != 1) { /* domain test fails */ if (verbose > 1) - pr_err("not adding %s to %s as" - " it is not in a compatible domain\n", + pr_err("not adding %s to %s as it is not in a compatible domain\n", devname, mp->path); goto next; @@ -956,8 +998,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, sra = NULL; } next: - if (sra) - sysfs_free(sra); + sysfs_free(sra); if (st != st2) free(st2); if (dl) @@ -965,16 +1006,17 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, } if (chosen) { /* add current device to chosen array as a spare */ - int mdfd = open_dev(devname2devnum(chosen->sys_name)); + int mdfd = open_dev(chosen->sys_name); if (mdfd >= 0) { struct mddev_dev devlist; - char devname[20]; + char chosen_devname[24]; // 2*11 for int (including signs) + colon + null devlist.next = NULL; devlist.used = 0; - devlist.writemostly = 0; - devlist.devname = devname; - sprintf(devname, "%d:%d", major(stb.st_rdev), - minor(stb.st_rdev)); + devlist.writemostly = FlagDefault; + devlist.failfast = FlagDefault; + devlist.devname = chosen_devname; + sprintf(chosen_devname, "%d:%d", major(rdev), + minor(rdev)); devlist.disposition = 'a'; close(dfd); *dfdp = -1; @@ -1036,8 +1078,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol, char *devname = NULL; unsigned long long devsectors; - if (de->d_ino == 0 || - de->d_name[0] == '.' || + if (de->d_ino == 0 || de->d_name[0] == '.' || (de->d_type != DT_LNK && de->d_type != DT_UNKNOWN)) goto next; @@ -1075,9 +1116,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol, st2 = dup_super(st); else st2 = guess_super_type(fd, guess_partitions); - if (st2 == NULL || - st2->ss->load_super(st2, fd, NULL) < 0) + if (st2 == NULL || st2->ss->load_super(st2, fd, NULL) < 0) goto next; + st2->ignore_hw_compat = 0; if (!st) { /* Check domain policy again, this time referring to metadata */ @@ -1103,8 +1144,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol, * metadata which makes better use of the device can * be found. */ - if (chosen == NULL || - chosen_size < info.component_size) { + if (chosen == NULL || chosen_size < info.component_size) { chosen_size = info.component_size; free(chosen); chosen = devname; @@ -1213,8 +1253,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol, !policy_action_allows(pol, st?st->ss->name:NULL, act_spare_same_slot)) { if (verbose > 1) - pr_err("%s is not bare, so not " - "considering as a spare\n", + pr_err("%s is not bare, so not considering as a spare\n", devname); return 1; } @@ -1257,7 +1296,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol, return rv; } -int IncrementalScan(int verbose) +int IncrementalScan(struct context *c, char *devnm) { /* look at every device listed in the 'map' file. * If one is found that is not running then: @@ -1269,27 +1308,66 @@ int IncrementalScan(int verbose) struct map_ent *me; struct mddev_ident *devs, *mddev; int rv = 0; + char container[32]; + char *only = NULL; map_read(&mapl); devs = conf_get_ident(NULL); +restart: for (me = mapl ; me ; me = me->next) { - mdu_array_info_t array; - mdu_bitmap_file_t bmf; struct mdinfo *sra; - int mdfd = open_dev(me->devnum); + int mdfd; + + if (devnm && strcmp(devnm, me->devnm) != 0) + continue; + if (me->metadata[0] == '/') { + char *sl; + + if (!devnm) + continue; + + /* member array, need to work on container */ + strncpy(container, me->metadata+1, 32); + container[31] = 0; + sl = strchr(container, '/'); + if (sl) + *sl = 0; + only = devnm; + devnm = container; + goto restart; + } + mdfd = open_dev(me->devnm); if (mdfd < 0) continue; - if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 || - errno != ENODEV) { + if (!isdigit(me->metadata[0])) { + /* must be a container */ + struct supertype *st = super_by_fd(mdfd, NULL); + int ret = 0; + struct map_ent *map = NULL; + + if (st && st->ss->load_container) + ret = st->ss->load_container(st, mdfd, NULL); + close(mdfd); + if (!ret && st && st->ss->container_content) { + if (map_lock(&map)) + pr_err("failed to get exclusive lock on mapfile\n"); + ret = Incremental_container(st, me->path, c, only); + map_unlock(&map); + } + if (ret) + rv = 1; + continue; + } + if (md_array_active(mdfd)) { close(mdfd); continue; } /* Ok, we can try this one. Maybe it needs a bitmap */ for (mddev = devs ; mddev ; mddev = mddev->next) - if (mddev->devname && me->path - && devname_matches(mddev->devname, me->path)) + if (mddev->devname && me->path && + devname_matches(mddev->devname, me->path)) break; if (mddev && mddev->bitmap_file) { /* @@ -1297,15 +1375,14 @@ int IncrementalScan(int verbose) * is a hint only */ int added = -1; - if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) { - int bmfd = open(mddev->bitmap_file, O_RDWR); - if (bmfd >= 0) { - added = ioctl(mdfd, SET_BITMAP_FILE, - bmfd); - close(bmfd); - } + int bmfd; + + bmfd = open(mddev->bitmap_file, O_RDWR); + if (bmfd >= 0) { + added = ioctl(mdfd, SET_BITMAP_FILE, bmfd); + close(bmfd); } - if (verbose >= 0) { + if (c->verbose >= 0) { if (added == 0) pr_err("Added bitmap %s to %s\n", mddev->bitmap_file, me->path); @@ -1317,16 +1394,16 @@ int IncrementalScan(int verbose) /* FIXME check for reshape_active and consider not * starting array. */ - sra = sysfs_read(mdfd, 0, 0); + sra = sysfs_read(mdfd, NULL, 0); if (sra) { if (sysfs_set_str(sra, NULL, "array_state", "read-auto") == 0) { - if (verbose >= 0) + if (c->verbose >= 0) pr_err("started array %s\n", - me->path ?: devnum2devname(me->devnum)); + me->path ?: me->devnm); } else { pr_err("failed to start array %s: %s\n", - me->path ?: devnum2devname(me->devnum), + me->path ?: me->devnm, strerror(errno)); rv = 1; } @@ -1343,7 +1420,7 @@ static char *container2devname(char *devname) if (devname[0] == '/') { int fd = open(devname, O_RDONLY); if (fd >= 0) { - mdname = devnum2devname(fd2devnum(fd)); + mdname = xstrdup(fd2devnm(fd)); close(fd); } } else { @@ -1354,7 +1431,7 @@ static char *container2devname(char *devname) return mdname; mp = map_by_uuid(&map, uuid); if (mp) - mdname = devnum2devname(mp->devnum); + mdname = xstrdup(mp->devnm); map_free(map); } @@ -1362,7 +1439,7 @@ static char *container2devname(char *devname) } static int Incremental_container(struct supertype *st, char *devname, - struct context *c) + struct context *c, char *only) { /* Collect the contents of this container and for each * array, choose a device name and assemble the array. @@ -1381,6 +1458,7 @@ static int Incremental_container(struct supertype *st, char *devname, int sfd; int ra_blocked = 0; int ra_all = 0; + int result = 0; st->ss->getinfo_super(st, &info, NULL); @@ -1388,7 +1466,9 @@ static int Incremental_container(struct supertype *st, char *devname, info.container_enough > 0) /* pass */; else { - if (c->verbose) + if (c->export) { + printf("MD_STARTED=no\n"); + } else if (c->verbose) pr_err("not enough devices to start the container\n"); return 0; } @@ -1409,8 +1489,12 @@ static int Incremental_container(struct supertype *st, char *devname, list = st->ss->container_content(st, NULL); /* when nothing to activate - quit */ - if (list == NULL) + if (list == NULL) { + if (c->export) { + printf("MD_STARTED=nothing\n"); + } return 0; + } for (ra = list ; ra ; ra = ra->next) { int mdfd; char chosen_name[1024]; @@ -1428,12 +1512,12 @@ static int Incremental_container(struct supertype *st, char *devname, mp = map_by_uuid(&map, ra->uuid); if (mp) { - mdfd = open_dev(mp->devnum); + mdfd = open_dev(mp->devnm); if (mp->path) strcpy(chosen_name, mp->path); else - strcpy(chosen_name, devnum2devname(mp->devnum)); - } else { + strcpy(chosen_name, mp->devnm); + } else if (!only) { /* Check in mdadm.conf for container == devname and * member == ra->text_version after second slash. @@ -1476,10 +1560,9 @@ static int Incremental_container(struct supertype *st, char *devname, if (match && match->devname && strcasecmp(match->devname, "") == 0) { if (c->verbose > 0) - pr_err("array %s/%s is " - "explicitly ignored by mdadm.conf\n", + pr_err("array %s/%s is explicitly ignored by mdadm.conf\n", match->container, match->member); - return 2; + continue; } if (match) trustworthy = LOCAL; @@ -1488,8 +1571,10 @@ static int Incremental_container(struct supertype *st, char *devname, ra->name, c->autof, trustworthy, - chosen_name); + chosen_name, 0); } + if (only && (!mp || strcmp(mp->devnm, only) != 0)) + continue; if (mdfd < 0) { pr_err("failed to open %s: %s.\n", @@ -1498,9 +1583,30 @@ static int Incremental_container(struct supertype *st, char *devname, } assemble_container_content(st, mdfd, ra, c, - chosen_name); + chosen_name, &result); close(mdfd); } + if (c->export && result) { + char sep = '='; + printf("MD_STARTED"); + if (result & INCR_NO) { + printf("%cno", sep); + sep = ','; + } + if (result & INCR_UNSAFE) { + printf("%cunsafe", sep); + sep = ','; + } + if (result & INCR_ALREADY) { + printf("%calready", sep); + sep = ','; + } + if (result & INCR_YES) { + printf("%cyes", sep); + sep = ','; + } + printf("\n"); + } /* don't move spares to container with volume being activated when all volumes are blocked */ @@ -1517,12 +1623,15 @@ static int Incremental_container(struct supertype *st, char *devname, struct supertype *sst = super_imsm.match_metadata_desc("imsm"); struct mdinfo *sinfo; - unsigned long long min_size = 0; - if (st->ss->min_acceptable_spare_size) - min_size = st->ss->min_acceptable_spare_size(st); + if (!sst->ss->load_container(sst, sfd, NULL)) { + struct spare_criteria sc = {0, 0}; + + if (st->ss->get_spare_criteria) + st->ss->get_spare_criteria(st, &sc); + close(sfd); - sinfo = container_choose_spares(sst, min_size, + sinfo = container_choose_spares(sst, &sc, domains, NULL, st->ss->name, 0); sst->ss->free_super(sst); @@ -1554,6 +1663,19 @@ static int Incremental_container(struct supertype *st, char *devname, return 0; } +static void run_udisks(char *arg1, char *arg2) +{ + int pid = fork(); + int status; + if (pid == 0) { + execl("/usr/bin/udisks", "udisks", arg1, arg2, NULL); + execl("/bin/udisks", "udisks", arg1, arg2, NULL); + exit(1); + } + while (pid > 0 && wait(&status) != pid) + ; +} + /* * IncrementalRemove - Attempt to see if the passed in device belongs to any * raid arrays, and if so first fail (if needed) and then remove the device. @@ -1567,36 +1689,51 @@ static int Incremental_container(struct supertype *st, char *devname, int IncrementalRemove(char *devname, char *id_path, int verbose) { int mdfd; - int rv; + int rv = 0; struct mdstat_ent *ent; struct mddev_dev devlist; + struct mdinfo mdi; + char buf[32]; if (!id_path) - dprintf(Name ": incremental removal without --path " - "lacks the possibility to re-add new device in this " - "port\n"); + dprintf("incremental removal without --path lacks the possibility to re-add new device in this port\n"); if (strchr(devname, '/')) { - pr_err("incremental removal requires a " - "kernel device name, not a file: %s\n", devname); + pr_err("incremental removal requires a kernel device name, not a file: %s\n", devname); return 1; } ent = mdstat_by_component(devname); if (!ent) { - pr_err("%s does not appear to be a component " - "of any array\n", devname); + if (verbose >= 0) + pr_err("%s does not appear to be a component of any array\n", devname); return 1; } - mdfd = open_dev(ent->devnum); + if (sysfs_init(&mdi, -1, ent->devnm)) { + pr_err("unable to initialize sysfs for: %s\n", devname); + return 1; + } + mdfd = open_dev_excl(ent->devnm); + if (mdfd > 0) { + close(mdfd); + if (sysfs_get_str(&mdi, NULL, "array_state", + buf, sizeof(buf)) > 0) { + if (strncmp(buf, "active", 6) == 0 || + strncmp(buf, "clean", 5) == 0) + sysfs_set_str(&mdi, NULL, + "array_state", "read-auto"); + } + } + mdfd = open_dev(ent->devnm); if (mdfd < 0) { - pr_err("Cannot open array %s!!\n", ent->dev); + if (verbose >= 0) + pr_err("Cannot open array %s!!\n", ent->devnm); free_mdstat(ent); return 1; } if (id_path) { struct map_ent *map = NULL, *me; - me = map_by_devnum(&map, ent->devnum); + me = map_by_devnm(&map, ent->devnm); if (me) policy_save_path(id_path, me); map_free(map); @@ -1611,20 +1748,39 @@ int IncrementalRemove(char *devname, char *id_path, int verbose) struct mdstat_ent *mdstat = mdstat_read(0, 0); struct mdstat_ent *memb; for (memb = mdstat ; memb ; memb = memb->next) - if (is_container_member(memb, ent->dev)) { - int subfd = open_dev(memb->devnum); + if (is_container_member(memb, ent->devnm)) { + int subfd = open_dev(memb->devnm); if (subfd >= 0) { - Manage_subdevs(memb->dev, subfd, - &devlist, verbose, 0, - NULL, 0); + rv |= Manage_subdevs( + memb->devnm, subfd, + &devlist, verbose, 0, + NULL, 0); close(subfd); } } free_mdstat(mdstat); } else - Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0); - devlist.disposition = 'r'; - rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0); + rv |= Manage_subdevs(ent->devnm, mdfd, &devlist, + verbose, 0, NULL, 0); + if (rv & 2) { + /* Failed due to EBUSY, try to stop the array. + * Give udisks a chance to unmount it first. + */ + int devid = devnm2devid(ent->devnm); + run_udisks("--unmount", map_dev(major(devid),minor(devid), 0)); + rv = Manage_stop(ent->devnm, mdfd, verbose, 1); + if (rv) + /* At least we can try to trigger a 'remove' */ + sysfs_uevent(&mdi, "remove"); + if (verbose) { + if (rv) + pr_err("Fail to stop %s too.\n", ent->devnm); + } + } else { + devlist.disposition = 'r'; + rv = Manage_subdevs(ent->devnm, mdfd, &devlist, + verbose, 0, NULL, 0); + } close(mdfd); free_mdstat(ent); return rv;