X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=sysfs.c;h=ceab27f5eb1eb22f544bfc504e093eb2f08f6d43;hp=725c90c51751c1dedce98d3398d698183d9cdbab;hb=d56dd607ba433d9334f0fb4114fe081742ae4361;hpb=1538aca5cbbd99be47657e0ca0b7e2186426a1b1 diff --git a/sysfs.c b/sysfs.c index 725c90c5..ceab27f5 100644 --- a/sysfs.c +++ b/sysfs.c @@ -57,16 +57,12 @@ void sysfs_free(struct mdinfo *sra) } } -int sysfs_open(int devnum, char *devname, char *attr) +int sysfs_open(char *devnm, char *devname, char *attr) { char fname[50]; int fd; - char *mdname = devnum2devname(devnum); - if (!mdname) - return -1; - - sprintf(fname, "/sys/block/%s/md/", mdname); + sprintf(fname, "/sys/block/%s/md/", devnm); if (devname) { strcat(fname, devname); strcat(fname, "/"); @@ -75,45 +71,36 @@ int sysfs_open(int devnum, char *devname, char *attr) fd = open(fname, O_RDWR); if (fd < 0 && errno == EACCES) fd = open(fname, O_RDONLY); - free(mdname); return fd; } -void sysfs_init(struct mdinfo *mdi, int fd, int devnum) +void sysfs_init(struct mdinfo *mdi, int fd, char *devnm) { mdi->sys_name[0] = 0; if (fd >= 0) { mdu_version_t vers; if (ioctl(fd, RAID_VERSION, &vers) != 0) return; - devnum = fd2devnum(fd); + devnm = fd2devnm(fd); } - if (devnum == NoMdDev) + if (devnm == NULL) return; - if (devnum >= 0) - sprintf(mdi->sys_name, "md%d", devnum); - else - sprintf(mdi->sys_name, "md_d%d", - -1-devnum); + strcpy(mdi->sys_name, devnm); } - -struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) +struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options) { char fname[PATH_MAX]; char buf[PATH_MAX]; char *base; char *dbase; struct mdinfo *sra; - struct mdinfo *dev; + struct mdinfo *dev, **devp; DIR *dir = NULL; struct dirent *de; - sra = malloc(sizeof(*sra)); - if (sra == NULL) - return sra; - memset(sra, 0, sizeof(*sra)); - sysfs_init(sra, fd, devnum); + sra = xcalloc(1, sizeof(*sra)); + sysfs_init(sra, fd, devnm); if (sra->sys_name[0] == 0) { free(sra); return NULL; @@ -183,8 +170,10 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) if (options & GET_CACHE) { strcpy(base, "stripe_cache_size"); if (load_sys(fname, buf)) - goto abort; - sra->cache_size = strtoul(buf, NULL, 0); + /* Probably level doesn't support it */ + sra->cache_size = 0; + else + sra->cache_size = strtoul(buf, NULL, 0); } if (options & GET_MISMATCH) { strcpy(base, "mismatch_cnt"); @@ -195,7 +184,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) if (options & GET_SAFEMODE) { int scale = 1; int dot = 0; - int i; + unsigned i; unsigned long msec; size_t len; @@ -221,6 +210,19 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) msec = (msec * 1000) / scale; sra->safe_mode_delay = msec; } + if (options & GET_BITMAP_LOCATION) { + strcpy(base, "bitmap/location"); + if (load_sys(fname, buf)) + goto abort; + if (strncmp(buf, "file", 4) == 0) + sra->bitmap_offset = 1; + else if (strncmp(buf, "none", 4) == 0) + sra->bitmap_offset = 0; + else if (buf[0] == '+') + sra->bitmap_offset = strtol(buf+1, NULL, 10); + else + goto abort; + } if (! (options & GET_DEVS)) return sra; @@ -232,6 +234,8 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) goto abort; sra->array.spare_disks = 0; + devp = &sra->devs; + sra->devs = NULL; while ((de = readdir(dir)) != NULL) { char *ep; if (de->d_ino == 0 || @@ -241,9 +245,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) dbase = base + strlen(base); *dbase++ = '/'; - dev = malloc(sizeof(*dev)); - if (!dev) - goto abort; + dev = xmalloc(sizeof(*dev)); /* Always get slot, major, minor */ strcpy(dbase, "slot"); @@ -265,7 +267,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) free(dev); goto abort; } - + } strcpy(dev->sys_name, de->d_name); dev->disk.raid_disk = strtoul(buf, &ep, 10); @@ -279,6 +281,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) free(dev); continue; } + sra->array.nr_disks++; sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor); /* special case check for block devices that can go 'offline' */ @@ -290,14 +293,20 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) } /* finally add this disk to the array */ - dev->next = sra->devs; - sra->devs = dev; + *devp = dev; + devp = & dev->next; + dev->next = NULL; if (options & GET_OFFSET) { strcpy(dbase, "offset"); if (load_sys(fname, buf)) goto abort; dev->data_offset = strtoull(buf, NULL, 0); + strcpy(dbase, "new_offset"); + if (load_sys(fname, buf) == 0) + dev->new_data_offset = strtoull(buf, NULL, 0); + else + dev->new_data_offset = dev->data_offset; } if (options & GET_SIZE) { strcpy(dbase, "size"); @@ -372,7 +381,7 @@ unsigned long long get_component_size(int fd) char fname[50]; int n; if (fstat(fd, &stb)) return 0; - if (major(stb.st_rdev) != get_mdp_major()) + if (major(stb.st_rdev) != (unsigned)get_mdp_major()) sprintf(fname, "/sys/block/md%d/md/component_size", (int)minor(stb.st_rdev)); else @@ -383,7 +392,7 @@ unsigned long long get_component_size(int fd) return 0; n = read(fd, fname, sizeof(fname)); close(fd); - if (n == sizeof(fname)) + if (n < 0 || n == sizeof(fname)) return 0; fname[n] = 0; return strtoull(fname, NULL, 10) * 2; @@ -393,7 +402,7 @@ int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev, char *name, char *val) { char fname[50]; - int n; + unsigned int n; int fd; sprintf(fname, "/sys/block/%s/md/%s/%s", @@ -404,8 +413,8 @@ int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev, n = write(fd, val, strlen(val)); close(fd); if (n != strlen(val)) { - dprintf(Name ": failed to write '%s' to '%s' (%s)\n", - val, fname, strerror(errno)); + dprintf("%s: failed to write '%s' to '%s' (%s)\n", + Name, val, fname, strerror(errno)); return -1; } return 0; @@ -419,6 +428,14 @@ int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev, return sysfs_set_str(sra, dev, name, valstr); } +int sysfs_set_num_signed(struct mdinfo *sra, struct mdinfo *dev, + char *name, long long val) +{ + char valstr[50]; + sprintf(valstr, "%lli", val); + return sysfs_set_str(sra, dev, name, valstr); +} + int sysfs_uevent(struct mdinfo *sra, char *event) { char fname[50]; @@ -432,8 +449,24 @@ int sysfs_uevent(struct mdinfo *sra, char *event) return -1; n = write(fd, event, strlen(event)); close(fd); + if (n != (int)strlen(event)) { + dprintf("%s: failed to write '%s' to '%s' (%s)\n", + Name, event, fname, strerror(errno)); + return -1; + } return 0; -} +} + +int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name) +{ + char fname[50]; + struct stat st; + + sprintf(fname, "/sys/block/%s/md/%s/%s", + sra->sys_name, dev?dev->sys_name:"", name); + + return stat(fname, &st) == 0; +} int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev, char *name) @@ -458,7 +491,7 @@ int sysfs_fd_get_ll(int fd, unsigned long long *val) lseek(fd, 0, 0); n = read(fd, buf, sizeof(buf)); if (n <= 0) - return -1; + return -2; buf[n] = 0; *val = strtoull(buf, &ep, 0); if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) @@ -480,6 +513,49 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev, return n; } +int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2) +{ + /* two numbers in this sysfs file, either + * NNN (NNN) + * or + * NNN / NNN + */ + char buf[80]; + int n; + char *ep, *ep2; + + lseek(fd, 0, 0); + n = read(fd, buf, sizeof(buf)); + if (n <= 0) + return -2; + buf[n] = 0; + *v1 = strtoull(buf, &ep, 0); + if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) + return -1; + while (*ep == ' ' || *ep == '/' || *ep == '(') + ep++; + *v2 = strtoull(ep, &ep2, 0); + if (ep2 == ep || (*ep2 != 0 && *ep2 != '\n' && *ep2 != ' ' && *ep2 != ')')) { + *v2 = *v1; + return 1; + } + return 2; +} + +int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev, + char *name, unsigned long long *v1, unsigned long long *v2) +{ + int n; + int fd; + + fd = sysfs_get_fd(sra, dev, name); + if (fd < 0) + return -1; + n = sysfs_fd_get_two(fd, v1, v2); + close(fd); + return n; +} + int sysfs_fd_get_str(int fd, char *val, int size) { int n; @@ -524,16 +600,30 @@ int sysfs_set_array(struct mdinfo *info, int vers) { int rv = 0; char ver[100]; + int raid_disks = info->array.raid_disks; ver[0] = 0; if (info->array.major_version == -1 && info->array.minor_version == -2) { + char buf[1024]; + strcat(strcpy(ver, "external:"), info->text_version); + /* meta version might already be set if we are setting + * new geometry for a reshape. In that case we don't + * want to over-write the 'readonly' flag that is + * stored in the metadata version. So read the current + * version first, and preserve the flag + */ + if (sysfs_get_str(info, NULL, "metadata_version", + buf, 1024) > 0) + if (strlen(buf) >= 9 && buf[9] == '-') + ver[9] = '-'; + if ((vers % 100) < 2 || sysfs_set_str(info, NULL, "metadata_version", ver) < 0) { - fprintf(stderr, Name ": This kernel does not " + pr_err("This kernel does not " "support external metadata.\n"); return 1; } @@ -542,7 +632,9 @@ int sysfs_set_array(struct mdinfo *info, int vers) return 0; /* FIXME */ rv |= sysfs_set_str(info, NULL, "level", map_num(pers, info->array.level)); - rv |= sysfs_set_num(info, NULL, "raid_disks", info->array.raid_disks); + if (info->reshape_active && info->delta_disks != UnSet) + raid_disks -= info->delta_disks; + rv |= sysfs_set_num(info, NULL, "raid_disks", raid_disks); rv |= sysfs_set_num(info, NULL, "chunk_size", info->array.chunk_size); rv |= sysfs_set_num(info, NULL, "layout", info->array.layout); rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2); @@ -552,7 +644,7 @@ int sysfs_set_array(struct mdinfo *info, int vers) rc = sysfs_set_num(info, NULL, "array_size", info->custom_array_size/2); if (rc && errno == ENOENT) { - fprintf(stderr, Name ": This kernel does not " + pr_err("This kernel does not " "have the md/array_size attribute, " "the array may be larger than expected\n"); rc = 0; @@ -562,6 +654,18 @@ int sysfs_set_array(struct mdinfo *info, int vers) if (info->array.level > 0) rv |= sysfs_set_num(info, NULL, "resync_start", info->resync_start); + + if (info->reshape_active) { + rv |= sysfs_set_num(info, NULL, "reshape_position", + info->reshape_progress); + rv |= sysfs_set_num(info, NULL, "chunk_size", info->new_chunk); + rv |= sysfs_set_num(info, NULL, "layout", info->new_layout); + rv |= sysfs_set_num(info, NULL, "raid_disks", + info->array.raid_disks); + /* We don't set 'new_level' here. That can only happen + * once the reshape completes. + */ + } return rv; } @@ -578,13 +682,7 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume) return rv; memset(nm, 0, sizeof(nm)); - sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor); - rv = readlink(dv, nm, sizeof(nm)); - if (rv <= 0) - return -1; - nm[rv] = '\0'; - dname = strrchr(nm, '/'); - if (dname) dname++; + dname = devid2kname(makedev(sd->disk.major, sd->disk.minor)); strcpy(sd->sys_name, "dev-"); strcpy(sd->sys_name+4, dname); @@ -603,7 +701,8 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume) * yet, so just ignore status for now. */ sysfs_set_str(sra, sd, "state", "insync"); - rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk); + if (sd->disk.raid_disk >= 0) + rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk); if (resume) sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start); } @@ -681,77 +780,66 @@ int sysfs_disk_to_scsi_id(int fd, __u32 *id) /* from an open block device, try to retrieve it scsi_id */ struct stat st; char path[256]; - char *c1, *c2; DIR *dir; struct dirent *de; + int host, bus, target, lun; if (fstat(fd, &st)) return 1; - snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device", + snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device/scsi_device", major(st.st_rdev), minor(st.st_rdev)); dir = opendir(path); if (!dir) return 1; - de = readdir(dir); - while (de) { - if (strncmp("scsi_disk:", de->d_name, - strlen("scsi_disk:")) == 0) + for (de = readdir(dir); de; de = readdir(dir)) { + int count; + + if (de->d_type != DT_DIR) + continue; + + count = sscanf(de->d_name, "%d:%d:%d:%d", &host, &bus, &target, &lun); + if (count == 4) break; - de = readdir(dir); } closedir(dir); if (!de) return 1; - c1 = strchr(de->d_name, ':'); - c1++; - c2 = strchr(c1, ':'); - *c2 = '\0'; - *id = strtol(c1, NULL, 10) << 24; /* host */ - c1 = c2 + 1; - c2 = strchr(c1, ':'); - *c2 = '\0'; - *id |= strtol(c1, NULL, 10) << 16; /* channel */ - c1 = c2 + 1; - c2 = strchr(c1, ':'); - *c2 = '\0'; - *id |= strtol(c1, NULL, 10) << 8; /* lun */ - c1 = c2 + 1; - *id |= strtol(c1, NULL, 10); /* id */ - + *id = (host << 24) | (bus << 16) | (target << 8) | (lun << 0); return 0; } - -int sysfs_unique_holder(int devnum, long rdev) +int sysfs_unique_holder(char *devnm, long rdev) { - /* Check that devnum is a holder of rdev, + /* Check that devnm is a holder of rdev, * and is the only holder. * we should be locked against races by - * an O_EXCL on devnum + * an O_EXCL on devnm + * Return values: + * 0 - not unique, not even a holder + * 1 - unique, this is the only holder. + * 2/3 - not unique, there is another holder + * -1 - error, cannot find the holders */ DIR *dir; struct dirent *de; char dirname[100]; char l; - int found = 0; + int ret = 0; sprintf(dirname, "/sys/dev/block/%d:%d/holders", major(rdev), minor(rdev)); dir = opendir(dirname); - errno = ENOENT; if (!dir) - return 0; + return -1; l = strlen(dirname); while ((de = readdir(dir)) != NULL) { - char buf[10]; + char buf[100]; + char *sl; int n; - int mj, mn; - char c; - int fd; if (de->d_ino == 0) continue; @@ -759,135 +847,75 @@ int sysfs_unique_holder(int devnum, long rdev) continue; strcpy(dirname+l, "/"); strcat(dirname+l, de->d_name); - strcat(dirname+l, "/dev"); - fd = open(dirname, O_RDONLY); - if (fd < 0) { - errno = ENOENT; - break; - } - n = read(fd, buf, sizeof(buf)-1); - close(fd); + n = readlink(dirname, buf, sizeof(buf)-1); + if (n <= 0) + continue; buf[n] = 0; - if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 || - c != '\n') { - errno = ENOENT; - break; - } - if (mj != MD_MAJOR) - mn = -1-(mn>>6); + sl = strrchr(buf, '/'); + if (!sl) + continue; + sl++; - if (devnum != mn) { - errno = EEXIST; - break; - } - found = 1; + if (strcmp(devnm, sl) == 0) + ret |= 1; + else + ret |= 2; } closedir(dir); - if (de) - return 0; - else - return found; + return ret; } -#ifndef MDASSEMBLE - -static char *clean_states[] = { - "clear", "inactive", "readonly", "read-auto", "clean", NULL }; - -int WaitClean(char *dev, int sock, int verbose) +int sysfs_freeze_array(struct mdinfo *sra) { - int fd; - struct mdinfo *mdi; - int rv = 1; - int devnum; - - fd = open(dev, O_RDONLY); - if (fd < 0) { - if (verbose) - fprintf(stderr, Name ": Couldn't open %s: %s\n", dev, strerror(errno)); - return 1; - } + /* Try to freeze resync/rebuild on this array/container. + * Return -1 if the array is busy, + * return 0 if this kernel doesn't support 'frozen' + * return 1 if it worked. + */ + char buf[20]; - devnum = fd2devnum(fd); - mdi = sysfs_read(fd, devnum, GET_VERSION|GET_LEVEL|GET_SAFEMODE); - if (!mdi) { - if (verbose) - fprintf(stderr, Name ": Failed to read sysfs attributes for " - "%s\n", dev); - close(fd); + if (!sysfs_attribute_available(sra, NULL, "sync_action")) + return 1; /* no sync_action == frozen */ + if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0) return 0; - } - - switch(mdi->array.level) { - case LEVEL_LINEAR: - case LEVEL_MULTIPATH: - case 0: - /* safemode delay is irrelevant for these levels */ - rv = 0; - - } + if (strcmp(buf, "frozen\n") == 0) + /* Already frozen */ + return 0; + if (strcmp(buf, "idle\n") != 0 && strcmp(buf, "recover\n") != 0) + return -1; + if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0) + return 0; + return 1; +} - /* for internal metadata the kernel handles the final clean - * transition, containers can never be dirty +int sysfs_wait(int fd, int *msec) +{ + /* Wait up to '*msec' for fd to have an exception condition. + * if msec == NULL, wait indefinitely. */ - if (!is_subarray(mdi->text_version)) - rv = 0; - - /* safemode disabled ? */ - if (mdi->safe_mode_delay == 0) - rv = 0; - - if (rv) { - int state_fd = sysfs_open(fd2devnum(fd), NULL, "array_state"); - char buf[20]; - fd_set fds; - struct timeval tm; - - /* minimize the safe_mode_delay and prepare to wait up to 5s - * for writes to quiesce - */ - sysfs_set_safemode(mdi, 1); - tm.tv_sec = 5; - tm.tv_usec = 0; - - FD_ZERO(&fds); - - /* wait for array_state to be clean */ - while (1) { - rv = read(state_fd, buf, sizeof(buf)); - if (rv < 0) - break; - if (sysfs_match_word(buf, clean_states) <= 4) - break; - FD_SET(state_fd, &fds); - rv = select(state_fd + 1, NULL, NULL, &fds, &tm); - if (rv < 0 && errno != EINTR) - break; - lseek(state_fd, 0, SEEK_SET); + fd_set fds; + int n; + FD_ZERO(&fds); + FD_SET(fd, &fds); + if (msec == NULL) + n = select(fd+1, NULL, NULL, &fds, NULL); + else if (*msec < 0) + n = 0; + else { + struct timeval start, end, tv; + gettimeofday(&start, NULL); + if (*msec < 1000) { + tv.tv_sec = 0; + tv.tv_usec = (*msec)*1000; + } else { + tv.tv_sec = (*msec)/1000; + tv.tv_usec = 0; } - if (rv < 0) - rv = 1; - else if (fping_monitor(sock) == 0 || - ping_monitor(mdi->text_version) == 0) { - /* we need to ping to close the window between array - * state transitioning to clean and the metadata being - * marked clean - */ - rv = 0; - } else - rv = 1; - if (rv && verbose) - fprintf(stderr, Name ": Error waiting for %s to be clean\n", - dev); - - /* restore the original safe_mode_delay */ - sysfs_set_safemode(mdi, mdi->safe_mode_delay); - close(state_fd); + n = select(fd+1, NULL, NULL, &fds, &tv); + gettimeofday(&end, NULL); + end.tv_sec -= start.tv_sec; + *msec -= (end.tv_sec * 1000 + end.tv_usec/1000 + - start.tv_usec/1000) + 1; } - - sysfs_free(mdi); - close(fd); - - return rv; + return n; } -#endif /* MDASSEMBLE */