X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=sysfs.c;h=8fdb52998409303e3874f84b6fe5a1eea8820aee;hp=8bcdaa59cbac61f093025f4aea876bf2c7815844;hb=d23534e4646313a67296b295666d165a87bb2c92;hpb=2eb91c81d8da5cfdc36c93d8daf405c95e8bac2f diff --git a/sysfs.c b/sysfs.c index 8bcdaa59..8fdb5299 100644 --- a/sysfs.c +++ b/sysfs.c @@ -2,7 +2,7 @@ * sysfs - extract md related information from sysfs. Part of: * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2006 Neil Brown + * Copyright (C) 2006-2009 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -81,50 +81,23 @@ int sysfs_open(int devnum, char *devname, char *attr) void sysfs_init(struct mdinfo *mdi, int fd, int devnum) { + mdi->sys_name[0] = 0; if (fd >= 0) { - struct stat stb; mdu_version_t vers; - if (fstat(fd, &stb)) - return; if (ioctl(fd, RAID_VERSION, &vers) != 0) return; - if (major(stb.st_rdev) == MD_MAJOR) - sprintf(mdi->sys_name, "md%d", (int)minor(stb.st_rdev)); - else if (major(stb.st_rdev) == get_mdp_major()) - sprintf(mdi->sys_name, "md_d%d", - (int)minor(stb.st_rdev)>>MdpMinorShift); - else { - /* must be an extended-minor partition. Look at the - * /sys/dev/block/%d:%d link which must look like - * ../../block/mdXXX/mdXXXpYY - */ - char path[30]; - char link[200]; - char *cp; - int n; - sprintf(path, "/sys/dev/block/%d:%d", major(stb.st_rdev), - minor(stb.st_rdev)); - n = readlink(path, link, sizeof(link)-1); - if (n <= 0) - return; - link[n] = 0; - cp = strrchr(link, '/'); - if (cp) *cp = 0; - cp = strchr(link, '/'); - if (cp && strncmp(cp, "/md", 3) == 0) - strcpy(mdi->sys_name, cp+1); - else - return; - } - } else { - if (devnum >= 0) - sprintf(mdi->sys_name, "md%d", devnum); - else - sprintf(mdi->sys_name, "md_d%d", - -1-devnum); + devnum = fd2devnum(fd); } + if (devnum == NoMdDev) + return; + if (devnum >= 0) + sprintf(mdi->sys_name, "md%d", devnum); + else + sprintf(mdi->sys_name, "md_d%d", + -1-devnum); } + struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) { /* Longest possible name in sysfs, mounted at /sys, is @@ -146,6 +119,10 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) return sra; memset(sra, 0, sizeof(*sra)); sysfs_init(sra, fd, devnum); + if (sra->sys_name[0] == 0) { + free(sra); + return NULL; + } sprintf(fname, "/sys/block/%s/md/", sra->sys_name); base = fname + strlen(fname); @@ -295,18 +272,34 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) } } - dev->next = sra->devs; - sra->devs = dev; - strcpy(dev->sys_name, de->d_name); dev->disk.raid_disk = strtoul(buf, &ep, 10); if (*ep) dev->disk.raid_disk = -1; strcpy(dbase, "block/dev"); - if (load_sys(fname, buf)) - goto abort; + if (load_sys(fname, buf)) { + free(dev); + if (options & SKIP_GONE_DEVS) + continue; + else + goto abort; + } sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor); + /* special case check for block devices that can go 'offline' */ + if (options & SKIP_GONE_DEVS) { + strcpy(dbase, "block/device/state"); + if (load_sys(fname, buf) == 0 && + strncmp(buf, "offline", 7) == 0) { + free(dev); + continue; + } + } + + /* finally add this disk to the array */ + dev->next = sra->devs; + sra->devs = dev; + if (options & GET_OFFSET) { strcpy(dbase, "offset"); if (load_sys(fname, buf)) @@ -433,21 +426,44 @@ int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev, return sysfs_set_str(sra, dev, name, valstr); } -int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, - char *name, unsigned long long *val) +int sysfs_uevent(struct mdinfo *sra, char *event) { char fname[50]; - char buf[50]; int n; int fd; - char *ep; + + sprintf(fname, "/sys/block/%s/uevent", + sra->sys_name); + fd = open(fname, O_WRONLY); + if (fd < 0) + return -1; + n = write(fd, event, strlen(event)); + close(fd); + return 0; +} + +int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev, + char *name) +{ + char fname[50]; + int fd; + sprintf(fname, "/sys/block/%s/md/%s/%s", sra->sys_name, dev?dev->sys_name:"", name); - fd = open(fname, O_RDONLY); + fd = open(fname, O_RDWR); if (fd < 0) - return -1; + fd = open(fname, O_RDONLY); + return fd; +} + +int sysfs_fd_get_ll(int fd, unsigned long long *val) +{ + char buf[50]; + int n; + char *ep; + + lseek(fd, 0, 0); n = read(fd, buf, sizeof(buf)); - close(fd); if (n <= 0) return -1; buf[n] = 0; @@ -457,6 +473,46 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, return 0; } +int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, + char *name, unsigned long long *val) +{ + int n; + int fd; + + fd = sysfs_get_fd(sra, dev, name); + if (fd < 0) + return -1; + n = sysfs_fd_get_ll(fd, val); + close(fd); + return n; +} + +int sysfs_fd_get_str(int fd, char *val, int size) +{ + int n; + + lseek(fd, 0, 0); + n = read(fd, val, size); + if (n <= 0) + return -1; + val[n] = 0; + return n; +} + +int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev, + char *name, char *val, int size) +{ + int n; + int fd; + + fd = sysfs_get_fd(sra, dev, name); + if (fd < 0) + return -1; + n = sysfs_fd_get_str(fd, val, size); + close(fd); + return n; +} + int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms) { unsigned long sec; @@ -497,6 +553,20 @@ int sysfs_set_array(struct mdinfo *info, int vers) rv |= sysfs_set_num(info, NULL, "chunk_size", info->array.chunk_size); rv |= sysfs_set_num(info, NULL, "layout", info->array.layout); rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2); + if (info->custom_array_size) { + int rc; + + rc = sysfs_set_num(info, NULL, "array_size", + info->custom_array_size/2); + if (rc && errno == ENOENT) { + fprintf(stderr, Name ": This kernel does not " + "have the md/array_size attribute, " + "the array may be larger than expected\n"); + rc = 0; + } + rv |= rc; + } + if (info->array.level > 0) rv |= sysfs_set_num(info, NULL, "resync_start", info->resync_start); return rv; @@ -528,8 +598,12 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd) rv = sysfs_set_num(sra, sd, "offset", sd->data_offset); rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2); if (sra->array.level != LEVEL_CONTAINER) { + if (sd->recovery_start == MaxSector) + /* This can correctly fail if array isn't started, + * yet, so just ignore status for now. + */ + sysfs_set_str(sra, sd, "state", "insync"); rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk); -// rv |= sysfs_set_str(sra, sd, "state", "in_sync"); } return rv; } @@ -712,3 +786,109 @@ int sysfs_unique_holder(int devnum, long rdev) else return found; } + +#ifndef MDASSEMBLE + +static char *clean_states[] = { + "clear", "inactive", "readonly", "read-auto", "clean", NULL }; + +int WaitClean(char *dev, int sock, int verbose) +{ + int fd; + struct mdinfo *mdi; + int rv = 1; + int devnum; + + fd = open(dev, O_RDONLY); + if (fd < 0) { + if (verbose) + fprintf(stderr, Name ": Couldn't open %s: %s\n", dev, strerror(errno)); + return 1; + } + + devnum = fd2devnum(fd); + mdi = sysfs_read(fd, devnum, GET_VERSION|GET_LEVEL|GET_SAFEMODE); + if (!mdi) { + if (verbose) + fprintf(stderr, Name ": Failed to read sysfs attributes for " + "%s\n", dev); + close(fd); + return 0; + } + + switch(mdi->array.level) { + case LEVEL_LINEAR: + case LEVEL_MULTIPATH: + case 0: + /* safemode delay is irrelevant for these levels */ + rv = 0; + + } + + /* for internal metadata the kernel handles the final clean + * transition, containers can never be dirty + */ + if (!is_subarray(mdi->text_version)) + rv = 0; + + /* safemode disabled ? */ + if (mdi->safe_mode_delay == 0) + rv = 0; + + if (rv) { + int state_fd = sysfs_open(fd2devnum(fd), NULL, "array_state"); + char buf[20]; + fd_set fds; + struct timeval tm; + + /* minimize the safe_mode_delay and prepare to wait up to 5s + * for writes to quiesce + */ + sysfs_set_safemode(mdi, 1); + tm.tv_sec = 5; + tm.tv_usec = 0; + + /* give mdmon a chance to checkpoint resync */ + sysfs_set_str(mdi, NULL, "sync_action", "idle"); + + FD_ZERO(&fds); + + /* wait for array_state to be clean */ + while (1) { + rv = read(state_fd, buf, sizeof(buf)); + if (rv < 0) + break; + if (sysfs_match_word(buf, clean_states) <= 4) + break; + FD_SET(state_fd, &fds); + rv = select(state_fd + 1, NULL, NULL, &fds, &tm); + if (rv < 0 && errno != EINTR) + break; + lseek(state_fd, 0, SEEK_SET); + } + if (rv < 0) + rv = 1; + else if (fping_monitor(sock) == 0 || + ping_monitor(mdi->text_version) == 0) { + /* we need to ping to close the window between array + * state transitioning to clean and the metadata being + * marked clean + */ + rv = 0; + } else + rv = 1; + if (rv && verbose) + fprintf(stderr, Name ": Error waiting for %s to be clean\n", + dev); + + /* restore the original safe_mode_delay */ + sysfs_set_safemode(mdi, mdi->safe_mode_delay); + close(state_fd); + } + + sysfs_free(mdi); + close(fd); + + return rv; +} +#endif /* MDASSEMBLE */