+
+int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long *val)
+{
+ int n;
+ int fd;
+
+ fd = sysfs_get_fd(sra, dev, name);
+ if (fd < 0)
+ return -1;
+ n = sysfs_fd_get_ll(fd, val);
+ close(fd);
+ return n;
+}
+
+int sysfs_fd_get_str(int fd, char *val, int size)
+{
+ int n;
+
+ lseek(fd, 0, 0);
+ n = read(fd, val, size);
+ if (n <= 0)
+ return -1;
+ val[n] = 0;
+ return n;
+}
+
+int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val, int size)
+{
+ int n;
+ int fd;
+
+ fd = sysfs_get_fd(sra, dev, name);
+ if (fd < 0)
+ return -1;
+ n = sysfs_fd_get_str(fd, val, size);
+ close(fd);
+ return n;
+}
+
+int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms)
+{
+ unsigned long sec;
+ unsigned long msec;
+ char delay[30];
+
+ sec = ms / 1000;
+ msec = ms % 1000;
+
+ sprintf(delay, "%ld.%03ld\n", sec, msec);
+ /* this '\n' ^ needed for kernels older than 2.6.28 */
+ return sysfs_set_str(sra, NULL, "safe_mode_delay", delay);
+}
+
+int sysfs_set_array(struct mdinfo *info, int vers)
+{
+ int rv = 0;
+ char ver[100];
+
+ ver[0] = 0;
+ if (info->array.major_version == -1 &&
+ info->array.minor_version == -2) {
+ strcat(strcpy(ver, "external:"), info->text_version);
+
+ if ((vers % 100) < 2 ||
+ sysfs_set_str(info, NULL, "metadata_version",
+ ver) < 0) {
+ fprintf(stderr, Name ": This kernel does not "
+ "support external metadata.\n");
+ return 1;
+ }
+ }
+ if (info->array.level < 0)
+ return 0; /* FIXME */
+ rv |= sysfs_set_str(info, NULL, "level",
+ map_num(pers, info->array.level));
+ rv |= sysfs_set_num(info, NULL, "raid_disks", info->array.raid_disks);
+ rv |= sysfs_set_num(info, NULL, "chunk_size", info->array.chunk_size);
+ rv |= sysfs_set_num(info, NULL, "layout", info->array.layout);
+ rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2);
+ if (info->custom_array_size) {
+ int rc;
+
+ rc = sysfs_set_num(info, NULL, "array_size",
+ info->custom_array_size/2);
+ if (rc && errno == ENOENT) {
+ fprintf(stderr, Name ": This kernel does not "
+ "have the md/array_size attribute, "
+ "the array may be larger than expected\n");
+ rc = 0;
+ }
+ rv |= rc;
+ }
+
+ if (info->array.level > 0)
+ rv |= sysfs_set_num(info, NULL, "resync_start", info->resync_start);
+ return rv;
+}
+
+int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
+{
+ char dv[100];
+ char nm[100];
+ char *dname;
+ int rv;
+
+ sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
+ rv = sysfs_set_str(sra, NULL, "new_dev", dv);
+ if (rv)
+ return rv;
+
+ memset(nm, 0, sizeof(nm));
+ sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor);
+ rv = readlink(dv, nm, sizeof(nm));
+ if (rv <= 0)
+ return -1;
+ nm[rv] = '\0';
+ dname = strrchr(nm, '/');
+ if (dname) dname++;
+ strcpy(sd->sys_name, "dev-");
+ strcpy(sd->sys_name+4, dname);
+
+ /* test write to see if 'recovery_start' is available */
+ if (resume && sd->recovery_start < MaxSector &&
+ sysfs_set_num(sra, sd, "recovery_start", 0)) {
+ sysfs_set_str(sra, sd, "state", "remove");
+ return -1;
+ }
+
+ rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
+ rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
+ if (sra->array.level != LEVEL_CONTAINER) {
+ if (sd->recovery_start == MaxSector)
+ /* This can correctly fail if array isn't started,
+ * yet, so just ignore status for now.
+ */
+ sysfs_set_str(sra, sd, "state", "insync");
+ rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
+ if (resume)
+ sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
+ }
+ return rv;
+}
+
+#if 0
+int sysfs_disk_to_sg(int fd)
+{
+ /* from an open block device, try find and open its corresponding
+ * scsi_generic interface
+ */
+ struct stat st;
+ char path[256];
+ char sg_path[256];
+ char sg_major_minor[8];
+ char *c;
+ DIR *dir;
+ struct dirent *de;
+ int major, minor, rv;
+
+ if (fstat(fd, &st))
+ return -1;
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+
+ de = readdir(dir);
+ while (de) {
+ if (strncmp("scsi_generic:", de->d_name,
+ strlen("scsi_generic:")) == 0)
+ break;
+ de = readdir(dir);
+ }
+ closedir(dir);
+
+ if (!de)
+ return -1;
+
+ snprintf(sg_path, sizeof(sg_path), "%s/%s/dev", path, de->d_name);
+ fd = open(sg_path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
+ close(fd);
+ if (rv < 0)
+ return -1;
+ else
+ sg_major_minor[rv - 1] = '\0';
+
+ c = strchr(sg_major_minor, ':');
+ *c = '\0';
+ c++;
+ major = strtol(sg_major_minor, NULL, 10);
+ minor = strtol(c, NULL, 10);
+ snprintf(path, sizeof(path), "/dev/.tmp.md.%d:%d:%d",
+ (int) getpid(), major, minor);
+ if (mknod(path, S_IFCHR|0600, makedev(major, minor))==0) {
+ fd = open(path, O_RDONLY);
+ unlink(path);
+ return fd;
+ }
+
+ return -1;
+}
+#endif
+
+int sysfs_disk_to_scsi_id(int fd, __u32 *id)
+{
+ /* from an open block device, try to retrieve it scsi_id */
+ struct stat st;
+ char path[256];
+ char *c1, *c2;
+ DIR *dir;
+ struct dirent *de;
+
+ if (fstat(fd, &st))
+ return 1;
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ dir = opendir(path);
+ if (!dir)
+ return 1;
+
+ de = readdir(dir);
+ while (de) {
+ if (strncmp("scsi_disk:", de->d_name,
+ strlen("scsi_disk:")) == 0)
+ break;
+ de = readdir(dir);
+ }
+ closedir(dir);
+
+ if (!de)
+ return 1;
+
+ c1 = strchr(de->d_name, ':');
+ c1++;
+ c2 = strchr(c1, ':');
+ *c2 = '\0';
+ *id = strtol(c1, NULL, 10) << 24; /* host */
+ c1 = c2 + 1;
+ c2 = strchr(c1, ':');
+ *c2 = '\0';
+ *id |= strtol(c1, NULL, 10) << 16; /* channel */
+ c1 = c2 + 1;
+ c2 = strchr(c1, ':');
+ *c2 = '\0';
+ *id |= strtol(c1, NULL, 10) << 8; /* lun */
+ c1 = c2 + 1;
+ *id |= strtol(c1, NULL, 10); /* id */
+
+ return 0;
+}
+
+
+int sysfs_unique_holder(int devnum, long rdev)
+{
+ /* Check that devnum is a holder of rdev,
+ * and is the only holder.
+ * we should be locked against races by
+ * an O_EXCL on devnum
+ */
+ DIR *dir;
+ struct dirent *de;
+ char dirname[100];
+ char l;
+ int found = 0;
+ sprintf(dirname, "/sys/dev/block/%d:%d/holders",
+ major(rdev), minor(rdev));
+ dir = opendir(dirname);
+ errno = ENOENT;
+ if (!dir)
+ return 0;
+ l = strlen(dirname);
+ while ((de = readdir(dir)) != NULL) {
+ char buf[10];
+ int n;
+ int mj, mn;
+ char c;
+ int fd;
+
+ if (de->d_ino == 0)
+ continue;
+ if (de->d_name[0] == '.')
+ continue;
+ strcpy(dirname+l, "/");
+ strcat(dirname+l, de->d_name);
+ strcat(dirname+l, "/dev");
+ fd = open(dirname, O_RDONLY);
+ if (fd < 0) {
+ errno = ENOENT;
+ break;
+ }
+ n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ buf[n] = 0;
+ if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 ||
+ c != '\n') {
+ errno = ENOENT;
+ break;
+ }
+ if (mj != MD_MAJOR)
+ mn = -1-(mn>>6);
+
+ if (devnum != mn) {
+ errno = EEXIST;
+ break;
+ }
+ found = 1;
+ }
+ closedir(dir);
+ if (de)
+ return 0;
+ else
+ return found;
+}
+
+#ifndef MDASSEMBLE
+
+static char *clean_states[] = {
+ "clear", "inactive", "readonly", "read-auto", "clean", NULL };
+
+int WaitClean(char *dev, int sock, int verbose)
+{
+ int fd;
+ struct mdinfo *mdi;
+ int rv = 1;
+ int devnum;
+
+ fd = open(dev, O_RDONLY);
+ if (fd < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": Couldn't open %s: %s\n", dev, strerror(errno));
+ return 1;
+ }
+
+ devnum = fd2devnum(fd);
+ mdi = sysfs_read(fd, devnum, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
+ if (!mdi) {
+ if (verbose)
+ fprintf(stderr, Name ": Failed to read sysfs attributes for "
+ "%s\n", dev);
+ close(fd);
+ return 0;
+ }
+
+ switch(mdi->array.level) {
+ case LEVEL_LINEAR:
+ case LEVEL_MULTIPATH:
+ case 0:
+ /* safemode delay is irrelevant for these levels */
+ rv = 0;
+
+ }
+
+ /* for internal metadata the kernel handles the final clean
+ * transition, containers can never be dirty
+ */
+ if (!is_subarray(mdi->text_version))
+ rv = 0;
+
+ /* safemode disabled ? */
+ if (mdi->safe_mode_delay == 0)
+ rv = 0;
+
+ if (rv) {
+ int state_fd = sysfs_open(fd2devnum(fd), NULL, "array_state");
+ char buf[20];
+ fd_set fds;
+ struct timeval tm;
+
+ /* minimize the safe_mode_delay and prepare to wait up to 5s
+ * for writes to quiesce
+ */
+ sysfs_set_safemode(mdi, 1);
+ tm.tv_sec = 5;
+ tm.tv_usec = 0;
+
+ /* give mdmon a chance to checkpoint resync */
+ sysfs_set_str(mdi, NULL, "sync_action", "idle");
+
+ FD_ZERO(&fds);
+
+ /* wait for array_state to be clean */
+ while (1) {
+ rv = read(state_fd, buf, sizeof(buf));
+ if (rv < 0)
+ break;
+ if (sysfs_match_word(buf, clean_states) <= 4)
+ break;
+ FD_SET(state_fd, &fds);
+ rv = select(state_fd + 1, NULL, NULL, &fds, &tm);
+ if (rv < 0 && errno != EINTR)
+ break;
+ lseek(state_fd, 0, SEEK_SET);
+ }
+ if (rv < 0)
+ rv = 1;
+ else if (fping_monitor(sock) == 0 ||
+ ping_monitor(mdi->text_version) == 0) {
+ /* we need to ping to close the window between array
+ * state transitioning to clean and the metadata being
+ * marked clean
+ */
+ rv = 0;
+ } else
+ rv = 1;
+ if (rv && verbose)
+ fprintf(stderr, Name ": Error waiting for %s to be clean\n",
+ dev);
+
+ /* restore the original safe_mode_delay */
+ sysfs_set_safemode(mdi, mdi->safe_mode_delay);
+ close(state_fd);
+ }
+
+ sysfs_free(mdi);
+ close(fd);
+
+ return rv;
+}
+#endif /* MDASSEMBLE */