X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=util.c;h=a238a2107b968443ad530506340cfed7e21f8ef3;hp=cb97816c346f5d4b5e88068d423090d00c10a2bc;hb=fa219dd26aa498f3e446798b7b341e8e4a29d378;hpb=f0ec67106c00f8dd1cadebfdff933fd8aefa0ff2 diff --git a/util.c b/util.c index cb97816c..a238a210 100644 --- a/util.c +++ b/util.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2012 Neil Brown + * Copyright (C) 2001-2013 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -28,11 +28,15 @@ #include #include #include +#include +#include +#include +#include #include #include #include +#include -int __offroot; /* * following taken from linux/blkpg.h because they aren't @@ -43,10 +47,10 @@ int __offroot; /* The argument structure */ struct blkpg_ioctl_arg { - int op; - int flags; - int datalen; - void *data; + int op; + int flags; + int datalen; + void *data; }; /* The subfunctions (for the op field) */ @@ -78,6 +82,135 @@ struct blkpg_partition { aren't permitted). */ #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +static int is_dlm_hooks_ready = 0; + +int dlm_funs_ready(void) +{ + return is_dlm_hooks_ready ? 1 : 0; +} + +#ifndef MDASSEMBLE +static struct dlm_hooks *dlm_hooks = NULL; +struct dlm_lock_resource *dlm_lock_res = NULL; +static int ast_called = 0; + +struct dlm_lock_resource { + dlm_lshandle_t *ls; + struct dlm_lksb lksb; +}; + +/* Using poll(2) to wait for and dispatch ASTs */ +static int poll_for_ast(dlm_lshandle_t ls) +{ + struct pollfd pfd; + + pfd.fd = dlm_hooks->ls_get_fd(ls); + pfd.events = POLLIN; + + while (!ast_called) + { + if (poll(&pfd, 1, 0) < 0) + { + perror("poll"); + return -1; + } + dlm_hooks->dispatch(dlm_hooks->ls_get_fd(ls)); + } + ast_called = 0; + + return 0; +} + +static void dlm_ast(void *arg) +{ + ast_called = 1; +} + +static char *cluster_name = NULL; +/* Create the lockspace, take bitmapXXX locks on all the bitmaps. */ +int cluster_get_dlmlock(int *lockid) +{ + int ret = -1; + char str[64]; + int flags = LKF_NOQUEUE; + + ret = get_cluster_name(&cluster_name); + if (ret) { + pr_err("The md can't get cluster name\n"); + return -1; + } + + dlm_lock_res = xmalloc(sizeof(struct dlm_lock_resource)); + dlm_lock_res->ls = dlm_hooks->create_lockspace(cluster_name, O_RDWR); + if (!dlm_lock_res->ls) { + pr_err("%s failed to create lockspace\n", cluster_name); + return -ENOMEM; + } + + snprintf(str, 64, "bitmap%s", cluster_name); + ret = dlm_hooks->ls_lock(dlm_lock_res->ls, LKM_PWMODE, &dlm_lock_res->lksb, + flags, str, strlen(str), 0, dlm_ast, + dlm_lock_res, NULL, NULL); + if (ret) { + pr_err("error %d when get PW mode on lock %s\n", errno, str); + dlm_hooks->release_lockspace(cluster_name, dlm_lock_res->ls, 1); + return ret; + } + + /* Wait for it to complete */ + poll_for_ast(dlm_lock_res->ls); + *lockid = dlm_lock_res->lksb.sb_lkid; + + return dlm_lock_res->lksb.sb_status; +} + +int cluster_release_dlmlock(int lockid) +{ + int ret = -1; + + if (!cluster_name) + return -1; + + ret = dlm_hooks->ls_unlock(dlm_lock_res->ls, lockid, 0, + &dlm_lock_res->lksb, dlm_lock_res); + if (ret) { + pr_err("error %d happened when unlock\n", errno); + /* XXX make sure the lock is unlocked eventually */ + goto out; + } + + /* Wait for it to complete */ + poll_for_ast(dlm_lock_res->ls); + + errno = dlm_lock_res->lksb.sb_status; + if (errno != EUNLOCK) { + pr_err("error %d happened in ast when unlock lockspace\n", errno); + /* XXX make sure the lockspace is unlocked eventually */ + goto out; + } + + ret = dlm_hooks->release_lockspace(cluster_name, dlm_lock_res->ls, 1); + if (ret) { + pr_err("error %d happened when release lockspace\n", errno); + /* XXX make sure the lockspace is released eventually */ + goto out; + } + free(dlm_lock_res); + +out: + return ret; +} +#else +int cluster_get_dlmlock(int *lockid) +{ + return -1; +} +int cluster_release_dlmlock(int lockid) +{ + return -1; +} +#endif + /* * Parse a 128 bit uuid in 4 integers * format is 32 hexx nibbles with options :. separator @@ -127,21 +260,21 @@ int parse_uuid(char *str, int uuid[4]) int md_get_version(int fd) { - struct stat stb; - mdu_version_t vers; + struct stat stb; + mdu_version_t vers; - if (fstat(fd, &stb)<0) - return -1; - if ((S_IFMT&stb.st_mode) != S_IFBLK) - return -1; + if (fstat(fd, &stb)<0) + return -1; + if ((S_IFMT&stb.st_mode) != S_IFBLK) + return -1; - if (ioctl(fd, RAID_VERSION, &vers) == 0) - return (vers.major*10000) + (vers.minor*100) + vers.patchlevel; - if (errno == EACCES) - return -1; - if (major(stb.st_rdev) == MD_MAJOR) - return (3600); - return -1; + if (ioctl(fd, RAID_VERSION, &vers) == 0) + return (vers.major*10000) + (vers.minor*100) + vers.patchlevel; + if (errno == EACCES) + return -1; + if (major(stb.st_rdev) == MD_MAJOR) + return (3600); + return -1; } int get_linux_version() @@ -194,7 +327,7 @@ unsigned long long parse_size(char *size) * followed by 'K', 'M', or 'G'. * Without a suffix, K is assumed. * Number returned is in sectors (half-K) - * 0 returned on error. + * INVALID_SECTORS returned on error. */ char *c; long long s = strtoll(size, &c, 10); @@ -213,11 +346,14 @@ unsigned long long parse_size(char *size) c++; s *= 1024 * 1024 * 2; break; + case 's': /* sectors */ + c++; + break; } } else - s = 0; + s = INVALID_SECTORS; if (*c) - s = 0; + s = INVALID_SECTORS; return s; } @@ -267,6 +403,16 @@ long parse_num(char *num) } #endif +int parse_cluster_confirm_arg(char *input, char **devname, int *slot) +{ + char *dev; + *slot = strtoul(input, &dev, 10); + if (dev == input || dev[0] != ':') + return -1; + *devname = dev+1; + return 0; +} + void remove_partitions(int fd) { /* remove partitions from this block devices. @@ -303,7 +449,7 @@ int test_partition(int fd) if (ioctl(fd, BLKPG, &a) == 0) /* Very unlikely, but not a partition */ return 0; - if (errno == ENXIO) + if (errno == ENXIO || errno == ENOTTY) /* not a partition */ return 0; @@ -344,14 +490,15 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail) /* there must be one of the 'copies' form 'first' */ int n = copies; int cnt = 0; + int this = first; while (n--) { - if (avail[first]) + if (avail[this]) cnt++; - first = (first+1) % raid_disks; + this = (this+1) % raid_disks; } if (cnt == 0) return 0; - + first = (first+(layout&255)) % raid_disks; } while (first != 0); return 1; @@ -363,6 +510,13 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail) case 1: return avail_disks >= 1; case 4: + if (avail_disks == raid_disks - 1 && + !avail[raid_disks - 1]) + /* If just the parity device is missing, then we + * have enough, even if not clean + */ + return 1; + /* FALL THROUGH */ case 5: if (clean) return avail_disks >= raid_disks-1; @@ -382,7 +536,6 @@ int enough_fd(int fd) { struct mdu_array_info_s array; struct mdu_disk_info_s disk; - int avail_disks = 0; int i, rv; char *avail; @@ -402,7 +555,6 @@ int enough_fd(int fd) continue; if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks) continue; - avail_disks++; avail[disk.raid_disk] = 1; } /* This is used on an active array, so assume it is clean */ @@ -506,7 +658,8 @@ int check_ext2(int fd, char *name) */ unsigned char sb[1024]; time_t mtime; - int size, bsize; + unsigned long long size; + int bsize; if (lseek(fd, 1024,0)!= 1024) return 0; if (read(fd, sb, 1024)!= 1024) @@ -517,10 +670,10 @@ int check_ext2(int fd, char *name) mtime = sb[44]|(sb[45]|(sb[46]|sb[47]<<8)<<8)<<8; bsize = sb[24]|(sb[25]|(sb[26]|sb[27]<<8)<<8)<<8; size = sb[4]|(sb[5]|(sb[6]|sb[7]<<8)<<8)<<8; + size <<= bsize; pr_err("%s appears to contain an ext2fs file system\n", name); - fprintf(stderr," size=%dK mtime=%s", - size*(1<ignore_hw_compat = 1; - st->ss->load_super(st, fd, name); - /* Looks like a raid array .. */ - pr_err("%s appears to be part of a raid array:\n", - name); - st->ss->getinfo_super(st, &info, NULL); - st->ss->free_super(st); - crtime = info.array.ctime; - level = map_num(pers, info.array.level); - if (!level) level = "-unknown-"; - fprintf(stderr, " level=%s devices=%d ctime=%s", - level, info.array.raid_disks, ctime(&crtime)); + if (!st) + return 0; + if (st->ss->add_to_super != NULL) { + st->ss->load_super(st, fd, name); + /* Looks like a raid array .. */ + pr_err("%s appears to be part of a raid array:\n", name); + st->ss->getinfo_super(st, &info, NULL); + st->ss->free_super(st); + crtime = info.array.ctime; + level = map_num(pers, info.array.level); + if (!level) + level = "-unknown-"; + cont_err("level=%s devices=%d ctime=%s", + level, info.array.raid_disks, ctime(&crtime)); + } else { + /* Looks like GPT or MBR */ + pr_err("partition table exists on %s\n", name); + } return 1; } @@ -667,13 +825,13 @@ char *human_size(long long bytes) if (bytes < 5000*1024) buf[0] = 0; else if (bytes < 2*1024LL*1024LL*1024LL) { - long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2; + long cMiB = (bytes * 200LL / (1LL<<20) + 1) / 2; long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2; snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)", cMiB/100 , cMiB % 100, cMB/100, cMB % 100); } else { - long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2; + long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2; long cGB = (bytes / (1000000000LL/200LL ) +1) /2; snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)", cGiB/100 , cGiB % 100, @@ -702,11 +860,11 @@ char *human_size_brief(long long bytes, int prefix) buf[0] = 0; else if (prefix == IEC) { if (bytes < 2*1024LL*1024LL*1024LL) { - long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2; + long cMiB = (bytes * 200LL / (1LL<<20) +1) /2; snprintf(buf, sizeof(buf), "%ld.%02ldMiB", cMiB/100 , cMiB % 100); } else { - long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2; + long cGiB = (bytes * 200LL / (1LL<<30) +1) /2; snprintf(buf, sizeof(buf), "%ld.%02ldGiB", cGiB/100 , cGiB % 100); } @@ -775,43 +933,79 @@ int get_data_disks(int level, int layout, int raid_disks) return data_disks; } +dev_t devnm2devid(char *devnm) +{ + /* First look in /sys/block/$DEVNM/dev for %d:%d + * If that fails, try parsing out a number + */ + char path[100]; + char *ep; + int fd; + int mjr,mnr; + + sprintf(path, "/sys/block/%s/dev", devnm); + fd = open(path, O_RDONLY); + if (fd >= 0) { + char buf[20]; + int n = read(fd, buf, sizeof(buf)); + close(fd); + if (n > 0) + buf[n] = 0; + if (n > 0 && sscanf(buf, "%d:%d\n", &mjr, &mnr) == 2) + return makedev(mjr, mnr); + } + if (strncmp(devnm, "md_d", 4) == 0 && + isdigit(devnm[4]) && + (mnr = strtoul(devnm+4, &ep, 10)) >= 0 && + ep > devnm && *ep == 0) + return makedev(get_mdp_major(), mnr << MdpMinorShift); + + if (strncmp(devnm, "md", 2) == 0 && + isdigit(devnm[2]) && + (mnr = strtoul(devnm+2, &ep, 10)) >= 0 && + ep > devnm && *ep == 0) + return makedev(MD_MAJOR, mnr); + + return 0; +} + #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) -char *get_md_name(int dev) +char *get_md_name(char *devnm) { /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */ /* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */ + static char devname[50]; struct stat stb; - dev_t rdev; + dev_t rdev = devnm2devid(devnm); char *dn; - if (dev < 0) { - int mdp = get_mdp_major(); - if (mdp < 0) return NULL; - rdev = makedev(mdp, (-1-dev)<<6); - snprintf(devname, sizeof(devname), "/dev/md/d%d", -1-dev); - if (stat(devname, &stb) == 0 - && (S_IFMT&stb.st_mode) == S_IFBLK - && (stb.st_rdev == rdev)) - return devname; - } else { - rdev = makedev(MD_MAJOR, dev); - snprintf(devname, sizeof(devname), "/dev/md%d", dev); - if (stat(devname, &stb) == 0 - && (S_IFMT&stb.st_mode) == S_IFBLK - && (stb.st_rdev == rdev)) - return devname; - - snprintf(devname, sizeof(devname), "/dev/md/%d", dev); + if (rdev == 0) + return 0; + if (strncmp(devnm, "md_", 3) == 0) { + snprintf(devname, sizeof(devname), "/dev/md/%s", + devnm + 3); if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK && (stb.st_rdev == rdev)) return devname; } + snprintf(devname, sizeof(devname), "/dev/%s", devnm); + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + + snprintf(devname, sizeof(devname), "/dev/md/%s", devnm+2); + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + dn = map_dev(major(rdev), minor(rdev), 0); if (dn) return dn; - snprintf(devname, sizeof(devname), "/dev/.tmp.md%d", dev); + snprintf(devname, sizeof(devname), "/dev/.tmp.%s", devnm); if (mknod(devname, S_IFBLK | 0600, rdev) == -1) if (errno != EEXIST) return NULL; @@ -829,65 +1023,40 @@ void put_md_name(char *name) if (strncmp(name, "/dev/.tmp.md", 12) == 0) unlink(name); } +#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */ -int find_free_devnum(int use_partitions) +int get_maj_min(char *dev, int *major, int *minor) { - int devnum; - for (devnum = 127; devnum != 128; - devnum = devnum ? devnum-1 : (1<<20)-1) { - char *dn; - int _devnum; - char nbuf[50]; - - _devnum = use_partitions ? (-1-devnum) : devnum; - if (mddev_busy(_devnum)) - continue; - sprintf(nbuf, "%s%d", use_partitions?"mdp":"md", devnum); - if (!conf_name_is_free(nbuf)) - continue; - /* make sure it is new to /dev too, at least as a - * non-standard */ - dn = map_dev(dev2major(_devnum), dev2minor(_devnum), 0); - if (dn && ! is_standard(dn, NULL)) - continue; - break; - } - if (devnum == 128) - return NoMdDev; - return use_partitions ? (-1-devnum) : devnum; + char *e; + *major = strtoul(dev, &e, 0); + return (e > dev && *e == ':' && e[1] && + (*minor = strtoul(e+1, &e, 0)) >= 0 && + *e == 0); } -#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */ int dev_open(char *dev, int flags) { /* like 'open', but if 'dev' matches %d:%d, create a temp * block device and open that */ - char *e; int fd = -1; char devname[32]; int major; int minor; - if (!dev) return -1; + if (!dev) + return -1; flags |= O_DIRECT; - major = strtoul(dev, &e, 0); - if (e > dev && *e == ':' && e[1] && - (minor = strtoul(e+1, &e, 0)) >= 0 && - *e == 0) { - char *path = map_dev(major, minor, 0); - if (path) - fd = open(path, flags); - if (fd < 0) { - snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d", - (int)getpid(), major, minor); - if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) { - fd = open(devname, flags); - unlink(devname); - } + if (get_maj_min(dev, &major, &minor)) { + snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d", + (int)getpid(), major, minor); + if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) { + fd = open(devname, flags); + unlink(devname); } if (fd < 0) { + /* Try /tmp as /dev appear to be read-only */ snprintf(devname, sizeof(devname), "/tmp/.tmp.md.%d:%d:%d", (int)getpid(), major, minor); if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) { @@ -900,26 +1069,30 @@ int dev_open(char *dev, int flags) return fd; } -int open_dev_flags(int devnum, int flags) +int open_dev_flags(char *devnm, int flags) { + dev_t devid; char buf[20]; - sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum)); + devid = devnm2devid(devnm); + sprintf(buf, "%d:%d", major(devid), minor(devid)); return dev_open(buf, flags); } -int open_dev(int devnum) +int open_dev(char *devnm) { - return open_dev_flags(devnum, O_RDONLY); + return open_dev_flags(devnm, O_RDONLY); } -int open_dev_excl(int devnum) +int open_dev_excl(char *devnm) { char buf[20]; int i; int flags = O_RDWR; + dev_t devid = devnm2devid(devnm); + long delay = 1000; - sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum)); + sprintf(buf, "%d:%d", major(devid), minor(devid)); for (i = 0 ; i < 25 ; i++) { int fd = dev_open(buf, flags|O_EXCL); if (fd >= 0) @@ -930,7 +1103,9 @@ int open_dev_excl(int devnum) } if (errno != EBUSY) return fd; - usleep(200000); + usleep(delay); + if (delay < 200000) + delay *= 2; } return -1; } @@ -953,6 +1128,7 @@ void wait_for(char *dev, int fd) { int i; struct stat stb_want; + long delay = 1000; if (fstat(fd, &stb_want) != 0 || (stb_want.st_mode & S_IFMT) != S_IFBLK) @@ -964,10 +1140,12 @@ void wait_for(char *dev, int fd) (stb.st_mode & S_IFMT) == S_IFBLK && (stb.st_rdev == stb_want.st_rdev)) return; - usleep(200000); + usleep(delay); + if (delay < 200000) + delay *= 2; } if (i == 25) - dprintf("%s: timeout waiting for %s\n", __func__, dev); + dprintf("timeout waiting for %s\n", dev); } struct superswitch *superlist[] = @@ -990,9 +1168,9 @@ struct supertype *super_by_fd(int fd, char **subarrayp) char version[20]; int i; char *subarray = NULL; - int container = NoMdDev; + char container[32] = ""; - sra = sysfs_read(fd, 0, GET_VERSION); + sra = sysfs_read(fd, NULL, GET_VERSION); if (sra) { vers = sra->array.major_version; @@ -1018,9 +1196,8 @@ struct supertype *super_by_fd(int fd, char **subarrayp) *subarray++ = '\0'; subarray = xstrdup(subarray); } - container = devname2devnum(dev); - if (sra) - sysfs_free(sra); + strcpy(container, dev); + sysfs_free(sra); sra = sysfs_read(-1, container, GET_VERSION); if (sra && sra->text_version[0]) verstr = sra->text_version; @@ -1031,14 +1208,13 @@ struct supertype *super_by_fd(int fd, char **subarrayp) for (i = 0; st == NULL && superlist[i] ; i++) st = superlist[i]->match_metadata_desc(verstr); - if (sra) - sysfs_free(sra); + sysfs_free(sra); if (st) { st->sb = NULL; if (subarrayp) *subarrayp = subarray; - st->container_dev = container; - st->devnum = fd2devnum(fd); + strcpy(st->container_devnm, container); + strcpy(st->devnm, fd2devnm(fd)); } else free(subarray); @@ -1073,6 +1249,8 @@ struct supertype *dup_super(struct supertype *orig) st->ss = orig->ss; st->max_devs = orig->max_devs; st->minor_version = orig->minor_version; + st->ignore_hw_compat = orig->ignore_hw_compat; + st->data_offset = orig->data_offset; st->sb = NULL; st->info = NULL; return st; @@ -1085,12 +1263,12 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type) */ struct superswitch *ss; struct supertype *st; - time_t besttime = 0; + unsigned int besttime = 0; int bestsuper = -1; int i; st = xcalloc(1, sizeof(*st)); - st->container_dev = NoMdDev; + st->container_devnm[0] = 0; for (i = 0 ; superlist[i]; i++) { int rv; @@ -1120,7 +1298,6 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type) rv = superlist[bestsuper]->load_super(st, fd, NULL); if (rv == 0) { superlist[bestsuper]->free_super(st); - st->ignore_hw_compat = 0; return st; } } @@ -1308,18 +1485,6 @@ int check_partitions(int fd, char *dname, unsigned long long freesize, return 0; } -void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk) -{ - int d; - - ioctl(mdfd, GET_ARRAY_INFO, ainf); - for (d = 0 ; d < MAX_DISKS ; d++) { - if (ioctl(mdfd, GET_DISK_INFO, disk) == 0 && - (disk->major || disk->minor)) - return; - } -} - int open_container(int fd) { /* 'fd' is a block device. Find out if it is in use @@ -1348,6 +1513,20 @@ int open_container(int fd) continue; if (de->d_name[0] == '.') continue; + /* Need to make sure it is a container and not a volume */ + sprintf(e, "/%s/md/metadata_version", de->d_name); + dfd = open(path, O_RDONLY); + if (dfd < 0) + continue; + n = read(dfd, buf, sizeof(buf)); + close(dfd); + if (n <= 0 || (unsigned)n >= sizeof(buf)) + continue; + buf[n] = 0; + if (strncmp(buf, "external", 8) != 0 || + n < 10 || + buf[9] == '/') + continue; sprintf(e, "/%s/dev", de->d_name); dfd = open(path, O_RDONLY); if (dfd < 0) @@ -1384,13 +1563,47 @@ struct superswitch *version_to_superswitch(char *vers) return NULL; } +int metadata_container_matches(char *metadata, char *devnm) +{ + /* Check if 'devnm' is the container named in 'metadata' + * which is + * /containername/componentname or + * -containername/componentname + */ + int l; + if (*metadata != '/' && *metadata != '-') + return 0; + l = strlen(devnm); + if (strncmp(metadata+1, devnm, l) != 0) + return 0; + if (metadata[l+1] != '/') + return 0; + return 1; +} + +int metadata_subdev_matches(char *metadata, char *devnm) +{ + /* Check if 'devnm' is the subdev named in 'metadata' + * which is + * /containername/subdev or + * -containername/subdev + */ + char *sl; + if (*metadata != '/' && *metadata != '-') + return 0; + sl = strchr(metadata+1, '/'); + if (!sl) + return 0; + if (strcmp(sl+1, devnm) == 0) + return 1; + return 0; +} + int is_container_member(struct mdstat_ent *mdstat, char *container) { if (mdstat->metadata_version == NULL || strncmp(mdstat->metadata_version, "external:", 9) != 0 || - !is_subarray(mdstat->metadata_version+9) || - strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 || - mdstat->metadata_version[10+strlen(container)] != '/') + !metadata_container_matches(mdstat->metadata_version+9, container)) return 0; return 1; @@ -1423,6 +1636,7 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet) struct mdinfo *mdi; struct mdinfo *info; int fd, err = 1; + char *_devnm; fd = open(dev, O_RDWR|O_EXCL); if (fd < 0) { @@ -1432,15 +1646,16 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet) return -1; } - st->devnum = fd2devnum(fd); - if (st->devnum == NoMdDev) { + _devnm = fd2devnm(fd); + if (_devnm == NULL) { if (!quiet) pr_err("Failed to determine device number for %s\n", dev); goto close_fd; } + strcpy(st->devnm, _devnm); - mdi = sysfs_read(fd, st->devnum, GET_VERSION|GET_LEVEL); + mdi = sysfs_read(fd, st->devnm, GET_VERSION|GET_LEVEL); if (!mdi) { if (!quiet) pr_err("Failed to read sysfs for %s\n", @@ -1462,8 +1677,7 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet) goto free_sysfs; } - st->devname = devnum2devname(st->devnum); - if (!st->devname) { + if (st->devnm[0] == 0) { if (!quiet) pr_err("Failed to allocate device name\n"); goto free_sysfs; @@ -1472,14 +1686,14 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet) if (!st->ss->load_container) { if (!quiet) pr_err("%s is not a container\n", dev); - goto free_name; + goto free_sysfs; } if (st->ss->load_container(st, fd, NULL)) { if (!quiet) pr_err("Failed to load metadata for %s\n", dev); - goto free_name; + goto free_sysfs; } info = st->ss->container_content(st, subarray); @@ -1496,9 +1710,6 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet) free_super: if (err) st->ss->free_super(st); - free_name: - if (err) - free(st->devname); free_sysfs: sysfs_free(mdi); close_fd: @@ -1596,16 +1807,14 @@ unsigned long long min_recovery_start(struct mdinfo *array) return recovery_start; } -int mdmon_pid(int devnum) +int mdmon_pid(char *devnm) { char path[100]; char pid[10]; int fd; int n; - char *devname = devnum2devname(devnum); - sprintf(path, "%s/%s.pid", MDMON_DIR, devname); - free(devname); + sprintf(path, "%s/%s.pid", MDMON_DIR, devnm); fd = open(path, O_RDONLY | O_NOATIME, 0); @@ -1618,9 +1827,9 @@ int mdmon_pid(int devnum) return atoi(pid); } -int mdmon_running(int devnum) +int mdmon_running(char *devnm) { - int pid = mdmon_pid(devnum); + int pid = mdmon_pid(devnm); if (pid <= 0) return 0; if (kill(pid, 0) == 0) @@ -1628,7 +1837,7 @@ int mdmon_running(int devnum) return 0; } -int start_mdmon(int devnum) +int start_mdmon(char *devnm) { int i, skipped; int len; @@ -1637,8 +1846,8 @@ int start_mdmon(int devnum) char pathbuf[1024]; char *paths[4] = { pathbuf, - "/sbin/mdmon", - "mdmon", + BINDIR "/mdmon", + "./mdmon", NULL }; @@ -1658,6 +1867,41 @@ int start_mdmon(int devnum) } else pathbuf[0] = '\0'; + /* First try to run systemctl */ + if (!check_env("MDADM_NO_SYSTEMCTL")) + switch(fork()) { + case 0: + /* FIXME yuk. CLOSE_EXEC?? */ + skipped = 0; + for (i = 3; skipped < 20; i++) + if (close(i) < 0) + skipped++; + else + skipped = 0; + + /* Don't want to see error messages from + * systemctl. If the service doesn't exist, + * we start mdmon ourselves. + */ + close(2); + open("/dev/null", O_WRONLY); + snprintf(pathbuf, sizeof(pathbuf), "mdmon@%s.service", + devnm); + status = execl("/usr/bin/systemctl", "systemctl", + "start", + pathbuf, NULL); + status = execl("/bin/systemctl", "systemctl", "start", + pathbuf, NULL); + exit(1); + case -1: pr_err("cannot run mdmon. Array remains readonly\n"); + return -1; + default: /* parent - good */ + pid = wait(&status); + if (pid >= 0 && status == 0) + return 0; + } + + /* That failed, try running mdmon directly */ switch(fork()) { case 0: /* FIXME yuk. CLOSE_EXEC?? */ @@ -1670,38 +1914,22 @@ int start_mdmon(int devnum) for (i = 0; paths[i]; i++) if (paths[i][0]) { - if (__offroot) { - execl(paths[i], "mdmon", "--offroot", - devnum2devname(devnum), - NULL); - } else { - execl(paths[i], "mdmon", - devnum2devname(devnum), - NULL); - } + execl(paths[i], paths[i], + devnm, NULL); } exit(1); - case -1: pr_err("cannot run mdmon. " - "Array remains readonly\n"); + case -1: pr_err("cannot run mdmon. Array remains readonly\n"); return -1; default: /* parent - good */ pid = wait(&status); - if (pid < 0 || status != 0) + if (pid < 0 || status != 0) { + pr_err("failed to launch mdmon. Array remains readonly\n"); return -1; + } } return 0; } -int check_env(char *name) -{ - char *val = getenv(name); - - if (val && atoi(val) == 1) - return 1; - - return 0; -} - __u32 random32(void) { __u32 rv; @@ -1713,6 +1941,27 @@ __u32 random32(void) return rv; } +void random_uuid(__u8 *buf) +{ + int fd, i, len; + __u32 r[4]; + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + goto use_random; + len = read(fd, buf, 16); + close(fd); + if (len != 16) + goto use_random; + + return; + +use_random: + for (i = 0; i < 4; i++) + r[i] = random(); + memcpy(buf, r, 16); +} + #ifndef MDASSEMBLE int flush_metadata_updates(struct supertype *st) { @@ -1722,7 +1971,7 @@ int flush_metadata_updates(struct supertype *st) return -1; } - sfd = connect_monitor(devnum2devname(st->container_dev)); + sfd = connect_monitor(st->container_devnm); if (sfd < 0) return -1; @@ -1767,8 +2016,7 @@ int experimental(void) if (check_env("MDADM_EXPERIMENTAL")) return 1; else { - pr_err("To use this feature MDADM_EXPERIMENTAL" - " environment variable has to be defined.\n"); + pr_err("To use this feature MDADM_EXPERIMENTAL environment variable has to be defined.\n"); return 0; } } @@ -1809,7 +2057,7 @@ struct mdinfo *container_choose_spares(struct supertype *st, found = 1; /* check if domain matches */ if (found && domlist) { - struct dev_policy *pol = devnum_policy(dev); + struct dev_policy *pol = devid_policy(dev); if (spare_group) pol_add(&pol, pol_domain, spare_group, NULL); @@ -1833,3 +2081,142 @@ struct mdinfo *container_choose_spares(struct supertype *st, } return disks; } + +/* Checks if paths point to the same device + * Returns 0 if they do. + * Returns 1 if they don't. + * Returns -1 if something went wrong, + * e.g. paths are empty or the files + * they point to don't exist */ +int compare_paths (char* path1, char* path2) +{ + struct stat st1,st2; + + if (path1 == NULL || path2 == NULL) + return -1; + if (stat(path1,&st1) != 0) + return -1; + if (stat(path2,&st2) != 0) + return -1; + if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) + return 0; + return 1; +} + +/* Make sure we can open as many devices as needed */ +void enable_fds(int devices) +{ + unsigned int fds = 20 + devices; + struct rlimit lim; + if (getrlimit(RLIMIT_NOFILE, &lim) != 0 + || lim.rlim_cur >= fds) + return; + if (lim.rlim_max < fds) + lim.rlim_max = fds; + lim.rlim_cur = fds; + setrlimit(RLIMIT_NOFILE, &lim); +} + +int in_initrd(void) +{ + /* This is based on similar function in systemd. */ + struct statfs s; + /* statfs.f_type is signed long on s390x and MIPS, causing all + sorts of sign extension problems with RAMFS_MAGIC being + defined as 0x858458f6 */ + return statfs("/", &s) >= 0 && + ((unsigned long)s.f_type == TMPFS_MAGIC || + ((unsigned long)s.f_type & 0xFFFFFFFFUL) == + ((unsigned long)RAMFS_MAGIC & 0xFFFFFFFFUL)); +} + +void reopen_mddev(int mdfd) +{ + /* Re-open without any O_EXCL, but keep + * the same fd + */ + char *devnm; + int fd; + devnm = fd2devnm(mdfd); + close(mdfd); + fd = open_dev(devnm); + if (fd >= 0 && fd != mdfd) + dup2(fd, mdfd); +} + +#ifndef MDASSEMBLE +static struct cmap_hooks *cmap_hooks = NULL; +static int is_cmap_hooks_ready = 0; + +void set_cmap_hooks(void) +{ + cmap_hooks = xmalloc(sizeof(struct cmap_hooks)); + cmap_hooks->cmap_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL); + if (!cmap_hooks->cmap_handle) + return; + + cmap_hooks->initialize = dlsym(cmap_hooks->cmap_handle, "cmap_initialize"); + cmap_hooks->get_string = dlsym(cmap_hooks->cmap_handle, "cmap_get_string"); + cmap_hooks->finalize = dlsym(cmap_hooks->cmap_handle, "cmap_finalize"); + + if (!cmap_hooks->initialize || !cmap_hooks->get_string || + !cmap_hooks->finalize) + dlclose(cmap_hooks->cmap_handle); + else + is_cmap_hooks_ready = 1; +} + +int get_cluster_name(char **cluster_name) +{ + int rv = -1; + cmap_handle_t handle; + + if (!is_cmap_hooks_ready) + return rv; + + rv = cmap_hooks->initialize(&handle); + if (rv != CS_OK) + goto out; + + rv = cmap_hooks->get_string(handle, "totem.cluster_name", cluster_name); + if (rv != CS_OK) { + free(*cluster_name); + rv = -1; + goto name_err; + } + + rv = 0; +name_err: + cmap_hooks->finalize(handle); +out: + return rv; +} + +void set_dlm_hooks(void) +{ + dlm_hooks = xmalloc(sizeof(struct dlm_hooks)); + dlm_hooks->dlm_handle = dlopen("libdlm_lt.so.3", RTLD_NOW | RTLD_LOCAL); + if (!dlm_hooks->dlm_handle) + return; + + dlm_hooks->create_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_create_lockspace"); + dlm_hooks->release_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_release_lockspace"); + dlm_hooks->ls_lock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_lock"); + dlm_hooks->ls_unlock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_unlock"); + dlm_hooks->ls_get_fd = dlsym(dlm_hooks->dlm_handle, "dlm_ls_get_fd"); + dlm_hooks->dispatch = dlsym(dlm_hooks->dlm_handle, "dlm_dispatch"); + + if (!dlm_hooks->create_lockspace || !dlm_hooks->ls_lock || + !dlm_hooks->ls_unlock || !dlm_hooks->release_lockspace || + !dlm_hooks->ls_get_fd || !dlm_hooks->dispatch) + dlclose(dlm_hooks->dlm_handle); + else + is_dlm_hooks_ready = 1; +} + +void set_hooks(void) +{ + set_dlm_hooks(); + set_cmap_hooks(); +} +#endif