]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - sysfs.c
mdadm: check value returned by snprintf against errors
[thirdparty/mdadm.git] / sysfs.c
diff --git a/sysfs.c b/sysfs.c
index 7a0403d635b7f2067fa48ecb50e9ba326f5a8037..2995713d644d572a447cacdddb1b5b2e043f98a5 100644 (file)
--- a/sysfs.c
+++ b/sysfs.c
 #include       "mdadm.h"
 #include       <dirent.h>
 #include       <ctype.h>
-
-int load_sys(char *path, char *buf)
+#include       "dlink.h"
+
+#define MAX_SYSFS_PATH_LEN     120
+
+struct dev_sysfs_rule {
+       struct dev_sysfs_rule *next;
+       char *devname;
+       int uuid[4];
+       int uuid_set;
+       struct sysfs_entry {
+               struct sysfs_entry *next;
+               char *name;
+               char *value;
+       } *entry;
+};
+
+int load_sys(char *path, char *buf, int len)
 {
        int fd = open(path, O_RDONLY);
        int n;
        if (fd < 0)
                return -1;
-       n = read(fd, buf, 1024);
+       n = read(fd, buf, len);
        close(fd);
-       if (n <0 || n >= 1024)
+       if (n <0 || n >= len)
                return -1;
        buf[n] = 0;
        if (n && buf[n-1] == '\n')
@@ -50,71 +65,77 @@ void sysfs_free(struct mdinfo *sra)
                while (sra->devs) {
                        struct mdinfo *d = sra->devs;
                        sra->devs = d->next;
+                       free(d->bb.entries);
                        free(d);
                }
+               free(sra->bb.entries);
                free(sra);
                sra = sra2;
        }
 }
 
-int sysfs_open(int devnum, char *devname, char *attr)
+int sysfs_open(char *devnm, char *devname, char *attr)
 {
-       char fname[50];
+       char fname[MAX_SYSFS_PATH_LEN];
        int fd;
-       char *mdname = devnum2devname(devnum);
-
-       if (!mdname)
-               return -1;
 
-       sprintf(fname, "/sys/block/%s/md/", mdname);
+       snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", devnm);
        if (devname) {
-               strcat(fname, devname);
-               strcat(fname, "/");
+               strncat(fname, devname, MAX_SYSFS_PATH_LEN - strlen(fname));
+               strncat(fname, "/", MAX_SYSFS_PATH_LEN - strlen(fname));
        }
-       strcat(fname, attr);
+       strncat(fname, attr, MAX_SYSFS_PATH_LEN - strlen(fname));
        fd = open(fname, O_RDWR);
        if (fd < 0 && errno == EACCES)
                fd = open(fname, O_RDONLY);
-       free(mdname);
        return fd;
 }
 
-void sysfs_init(struct mdinfo *mdi, int fd, int devnum)
+void sysfs_init_dev(struct mdinfo *mdi, dev_t devid)
 {
-       mdi->sys_name[0] = 0;
-       if (fd >= 0) {
-               mdu_version_t vers;
-               if (ioctl(fd, RAID_VERSION, &vers) != 0)
-                       return;
-               devnum = fd2devnum(fd);
-       }
-       if (devnum == NoMdDev)
-               return;
-       if (devnum >= 0)
-               sprintf(mdi->sys_name, "md%d", devnum);
-       else
-               sprintf(mdi->sys_name, "md_d%d",
-                       -1-devnum);
+       snprintf(mdi->sys_name,
+                sizeof(mdi->sys_name), "dev-%s", devid2kname(devid));
 }
 
+int sysfs_init(struct mdinfo *mdi, int fd, char *devnm)
+{
+       struct stat stb;
+       char fname[MAX_SYSFS_PATH_LEN];
+       int retval = -ENODEV;
+
+       mdi->sys_name[0] = 0;
+       if (fd >= 0)
+               devnm = fd2devnm(fd);
+
+       if (devnm == NULL)
+               goto out;
 
-struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
+       snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md", devnm);
+
+       if (stat(fname, &stb))
+               goto out;
+       if (!S_ISDIR(stb.st_mode))
+               goto out;
+       strcpy(mdi->sys_name, devnm);
+
+       retval = 0;
+out:
+       return retval;
+}
+
+struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
 {
        char fname[PATH_MAX];
        char buf[PATH_MAX];
        char *base;
        char *dbase;
        struct mdinfo *sra;
-       struct mdinfo *dev;
+       struct mdinfo *dev, **devp;
        DIR *dir = NULL;
        struct dirent *de;
 
-       sra = malloc(sizeof(*sra));
-       if (sra == NULL)
-               return sra;
-       memset(sra, 0, sizeof(*sra));
-       sysfs_init(sra, fd, devnum);
-       if (sra->sys_name[0] == 0) {
+       sra = xcalloc(1, sizeof(*sra));
+       if (sysfs_init(sra, fd, devnm)) {
                free(sra);
                return NULL;
        }
@@ -125,7 +146,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
        sra->devs = NULL;
        if (options & GET_VERSION) {
                strcpy(base, "metadata_version");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
                if (strncmp(buf, "none", 4) == 0) {
                        sra->array.major_version =
@@ -144,31 +165,25 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
        }
        if (options & GET_LEVEL) {
                strcpy(base, "level");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
                sra->array.level = map_name(pers, buf);
        }
        if (options & GET_LAYOUT) {
                strcpy(base, "layout");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
                sra->array.layout = strtoul(buf, NULL, 0);
        }
-       if (options & GET_DISKS) {
+       if (options & (GET_DISKS|GET_STATE)) {
                strcpy(base, "raid_disks");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
                sra->array.raid_disks = strtoul(buf, NULL, 0);
        }
-       if (options & GET_DEGRADED) {
-               strcpy(base, "degraded");
-               if (load_sys(fname, buf))
-                       goto abort;
-               sra->array.failed_disks = strtoul(buf, NULL, 0);
-       }
        if (options & GET_COMPONENT) {
                strcpy(base, "component_size");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
                sra->component_size = strtoull(buf, NULL, 0);
                /* sysfs reports "K", but we want sectors */
@@ -176,19 +191,21 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
        }
        if (options & GET_CHUNK) {
                strcpy(base, "chunk_size");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
                sra->array.chunk_size = strtoul(buf, NULL, 0);
        }
        if (options & GET_CACHE) {
                strcpy(base, "stripe_cache_size");
-               if (load_sys(fname, buf))
-                       goto abort;
-               sra->cache_size = strtoul(buf, NULL, 0);
+               if (load_sys(fname, buf, sizeof(buf)))
+                       /* Probably level doesn't support it */
+                       sra->cache_size = 0;
+               else
+                       sra->cache_size = strtoul(buf, NULL, 0);
        }
        if (options & GET_MISMATCH) {
                strcpy(base, "mismatch_cnt");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
                sra->mismatch_cnt = strtoul(buf, NULL, 0);
        }
@@ -200,7 +217,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
                size_t len;
 
                strcpy(base, "safe_mode_delay");
-               if (load_sys(fname, buf))
+               if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
 
                /* remove a period, and count digits after it */
@@ -221,6 +238,35 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
                msec = (msec * 1000) / scale;
                sra->safe_mode_delay = msec;
        }
+       if (options & GET_BITMAP_LOCATION) {
+               strcpy(base, "bitmap/location");
+               if (load_sys(fname, buf, sizeof(buf)))
+                       goto abort;
+               if (strncmp(buf, "file", 4) == 0)
+                       sra->bitmap_offset = 1;
+               else if (strncmp(buf, "none", 4) == 0)
+                       sra->bitmap_offset = 0;
+               else if (buf[0] == '+')
+                       sra->bitmap_offset = strtol(buf+1, NULL, 10);
+               else
+                       goto abort;
+       }
+
+       if (options & GET_ARRAY_STATE) {
+               strcpy(base, "array_state");
+               if (load_sys(fname, buf, sizeof(buf)))
+                       goto abort;
+               sra->array_state = map_name(sysfs_array_states, buf);
+       }
+
+       if (options & GET_CONSISTENCY_POLICY) {
+               strcpy(base, "consistency_policy");
+               if (load_sys(fname, buf, sizeof(buf)))
+                       sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
+               else
+                       sra->consistency_policy = map_name(consistency_policies,
+                                                          buf);
+       }
 
        if (! (options & GET_DEVS))
                return sra;
@@ -231,7 +277,12 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
        if (!dir)
                goto abort;
        sra->array.spare_disks = 0;
+       sra->array.active_disks = 0;
+       sra->array.failed_disks = 0;
+       sra->array.working_disks = 0;
 
+       devp = &sra->devs;
+       sra->devs = NULL;
        while ((de = readdir(dir)) != NULL) {
                char *ep;
                if (de->d_ino == 0 ||
@@ -241,13 +292,11 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
                dbase = base + strlen(base);
                *dbase++ = '/';
 
-               dev = malloc(sizeof(*dev));
-               if (!dev)
-                       goto abort;
+               dev = xcalloc(1, sizeof(*dev));
 
                /* Always get slot, major, minor */
                strcpy(dbase, "slot");
-               if (load_sys(fname, buf)) {
+               if (load_sys(fname, buf, sizeof(buf))) {
                        /* hmm... unable to read 'slot' maybe the device
                         * is going away?
                         */
@@ -265,65 +314,87 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
                                free(dev);
                                goto abort;
                        }
-                       
+
                }
                strcpy(dev->sys_name, de->d_name);
                dev->disk.raid_disk = strtoul(buf, &ep, 10);
                if (*ep) dev->disk.raid_disk = -1;
 
+               sra->array.nr_disks++;
                strcpy(dbase, "block/dev");
-               if (load_sys(fname, buf)) {
+               if (load_sys(fname, buf, sizeof(buf))) {
                        /* assume this is a stale reference to a hot
                         * removed device
                         */
-                       free(dev);
-                       continue;
+                       if (!(options & GET_DEVS_ALL)) {
+                               free(dev);
+                               continue;
+                       }
+               } else {
+                       sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
                }
-               sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
 
-               /* special case check for block devices that can go 'offline' */
-               strcpy(dbase, "block/device/state");
-               if (load_sys(fname, buf) == 0 &&
-                   strncmp(buf, "offline", 7) == 0) {
-                       free(dev);
-                       continue;
+               if (!(options & GET_DEVS_ALL)) {
+                       /* special case check for block devices that can go 'offline' */
+                       strcpy(dbase, "block/device/state");
+                       if (load_sys(fname, buf, sizeof(buf)) == 0 &&
+                           strncmp(buf, "offline", 7) == 0) {
+                               free(dev);
+                               continue;
+                       }
                }
 
                /* finally add this disk to the array */
-               dev->next = sra->devs;
-               sra->devs = dev;
+               *devp = dev;
+               devp = & dev->next;
+               dev->next = NULL;
 
                if (options & GET_OFFSET) {
                        strcpy(dbase, "offset");
-                       if (load_sys(fname, buf))
+                       if (load_sys(fname, buf, sizeof(buf)))
                                goto abort;
                        dev->data_offset = strtoull(buf, NULL, 0);
+                       strcpy(dbase, "new_offset");
+                       if (load_sys(fname, buf, sizeof(buf)) == 0)
+                               dev->new_data_offset = strtoull(buf, NULL, 0);
+                       else
+                               dev->new_data_offset = dev->data_offset;
                }
                if (options & GET_SIZE) {
                        strcpy(dbase, "size");
-                       if (load_sys(fname, buf))
+                       if (load_sys(fname, buf, sizeof(buf)))
                                goto abort;
                        dev->component_size = strtoull(buf, NULL, 0) * 2;
                }
                if (options & GET_STATE) {
                        dev->disk.state = 0;
                        strcpy(dbase, "state");
-                       if (load_sys(fname, buf))
+                       if (load_sys(fname, buf, sizeof(buf)))
                                goto abort;
-                       if (strstr(buf, "in_sync"))
-                               dev->disk.state |= (1<<MD_DISK_SYNC);
                        if (strstr(buf, "faulty"))
                                dev->disk.state |= (1<<MD_DISK_FAULTY);
-                       if (dev->disk.state == 0)
-                               sra->array.spare_disks++;
+                       else {
+                               sra->array.working_disks++;
+                               if (strstr(buf, "in_sync")) {
+                                       dev->disk.state |= (1<<MD_DISK_SYNC);
+                                       sra->array.active_disks++;
+                               }
+                               if (dev->disk.state == 0)
+                                       sra->array.spare_disks++;
+                       }
                }
                if (options & GET_ERROR) {
                        strcpy(buf, "errors");
-                       if (load_sys(fname, buf))
+                       if (load_sys(fname, buf, sizeof(buf)))
                                goto abort;
                        dev->errors = strtoul(buf, NULL, 0);
                }
        }
+
+       if ((options & GET_STATE) && sra->array.raid_disks)
+               sra->array.failed_disks = sra->array.raid_disks -
+                       sra->array.active_disks - sra->array.spare_disks;
+
        closedir(dir);
        return sra;
 
@@ -369,21 +440,18 @@ unsigned long long get_component_size(int fd)
         * This returns in units of sectors.
         */
        struct stat stb;
-       char fname[50];
+       char fname[MAX_SYSFS_PATH_LEN];
        int n;
-       if (fstat(fd, &stb)) return 0;
-       if (major(stb.st_rdev) != (unsigned)get_mdp_major())
-               sprintf(fname, "/sys/block/md%d/md/component_size",
-                       (int)minor(stb.st_rdev));
-       else
-               sprintf(fname, "/sys/block/md_d%d/md/component_size",
-                       (int)minor(stb.st_rdev)>>MdpMinorShift);
+       if (fstat(fd, &stb))
+               return 0;
+       snprintf(fname, MAX_SYSFS_PATH_LEN,
+                "/sys/block/%s/md/component_size", stat2devnm(&stb));
        fd = open(fname, O_RDONLY);
        if (fd < 0)
                return 0;
        n = read(fd, fname, sizeof(fname));
        close(fd);
-       if (n == sizeof(fname))
+       if (n < 0 || n == sizeof(fname))
                return 0;
        fname[n] = 0;
        return strtoull(fname, NULL, 10) * 2;
@@ -392,11 +460,11 @@ unsigned long long get_component_size(int fd)
 int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
                  char *name, char *val)
 {
-       char fname[50];
+       char fname[MAX_SYSFS_PATH_LEN];
        unsigned int n;
        int fd;
 
-       sprintf(fname, "/sys/block/%s/md/%s/%s",
+       snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
                sra->sys_name, dev?dev->sys_name:"", name);
        fd = open(fname, O_WRONLY);
        if (fd < 0)
@@ -404,7 +472,7 @@ int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
        n = write(fd, val, strlen(val));
        close(fd);
        if (n != strlen(val)) {
-               dprintf(Name ": failed to write '%s' to '%s' (%s)\n",
+               dprintf("failed to write '%s' to '%s' (%s)\n",
                        val, fname, strerror(errno));
                return -1;
        }
@@ -419,28 +487,41 @@ int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
        return sysfs_set_str(sra, dev, name, valstr);
 }
 
+int sysfs_set_num_signed(struct mdinfo *sra, struct mdinfo *dev,
+                        char *name, long long val)
+{
+       char valstr[50];
+       sprintf(valstr, "%lli", val);
+       return sysfs_set_str(sra, dev, name, valstr);
+}
+
 int sysfs_uevent(struct mdinfo *sra, char *event)
 {
-       char fname[50];
+       char fname[MAX_SYSFS_PATH_LEN];
        int n;
        int fd;
 
-       sprintf(fname, "/sys/block/%s/uevent",
+       snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/uevent",
                sra->sys_name);
        fd = open(fname, O_WRONLY);
        if (fd < 0)
                return -1;
        n = write(fd, event, strlen(event));
        close(fd);
+       if (n != (int)strlen(event)) {
+               dprintf("failed to write '%s' to '%s' (%s)\n",
+                       event, fname, strerror(errno));
+               return -1;
+       }
        return 0;
-}      
+}
 
 int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name)
 {
-       char fname[50];
+       char fname[MAX_SYSFS_PATH_LEN];
        struct stat st;
 
-       sprintf(fname, "/sys/block/%s/md/%s/%s",
+       snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
                sra->sys_name, dev?dev->sys_name:"", name);
 
        return stat(fname, &st) == 0;
@@ -449,10 +530,10 @@ int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name
 int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
                       char *name)
 {
-       char fname[50];
+       char fname[MAX_SYSFS_PATH_LEN];
        int fd;
 
-       sprintf(fname, "/sys/block/%s/md/%s/%s",
+       snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
                sra->sys_name, dev?dev->sys_name:"", name);
        fd = open(fname, O_RDWR);
        if (fd < 0)
@@ -468,8 +549,8 @@ int sysfs_fd_get_ll(int fd, unsigned long long *val)
 
        lseek(fd, 0, 0);
        n = read(fd, buf, sizeof(buf));
-       if (n <= 0)
-               return -1;
+       if (n <= 0 || n == sizeof(buf))
+               return -2;
        buf[n] = 0;
        *val = strtoull(buf, &ep, 0);
        if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
@@ -491,13 +572,56 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
        return n;
 }
 
+int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2)
+{
+       /* two numbers in this sysfs file, either
+        *  NNN (NNN)
+        * or
+        *  NNN / NNN
+        */
+       char buf[80];
+       int n;
+       char *ep, *ep2;
+
+       lseek(fd, 0, 0);
+       n = read(fd, buf, sizeof(buf));
+       if (n <= 0 || n == sizeof(buf))
+               return -2;
+       buf[n] = 0;
+       *v1 = strtoull(buf, &ep, 0);
+       if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
+               return -1;
+       while (*ep == ' ' || *ep == '/' || *ep == '(')
+               ep++;
+       *v2 = strtoull(ep, &ep2, 0);
+       if (ep2 == ep || (*ep2 != 0 && *ep2 != '\n' && *ep2 != ' ' && *ep2 != ')')) {
+               *v2 = *v1;
+               return 1;
+       }
+       return 2;
+}
+
+int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev,
+                 char *name, unsigned long long *v1, unsigned long long *v2)
+{
+       int n;
+       int fd;
+
+       fd = sysfs_get_fd(sra, dev, name);
+       if (fd < 0)
+               return -1;
+       n = sysfs_fd_get_two(fd, v1, v2);
+       close(fd);
+       return n;
+}
+
 int sysfs_fd_get_str(int fd, char *val, int size)
 {
        int n;
 
        lseek(fd, 0, 0);
        n = read(fd, val, size);
-       if (n <= 0)
+       if (n <= 0 || n == size)
                return -1;
        val[n] = 0;
        return n;
@@ -535,17 +659,30 @@ int sysfs_set_array(struct mdinfo *info, int vers)
 {
        int rv = 0;
        char ver[100];
+       int raid_disks = info->array.raid_disks;
 
        ver[0] = 0;
        if (info->array.major_version == -1 &&
            info->array.minor_version == -2) {
+               char buf[1024];
+
                strcat(strcpy(ver, "external:"), info->text_version);
 
+               /* meta version might already be set if we are setting
+                * new geometry for a reshape.  In that case we don't
+                * want to over-write the 'readonly' flag that is
+                * stored in the metadata version.  So read the current
+                * version first, and preserve the flag
+                */
+               if (sysfs_get_str(info, NULL, "metadata_version",
+                                 buf, 1024) > 0)
+                       if (strlen(buf) >= 9 && buf[9] == '-')
+                               ver[9] = '-';
+
                if ((vers % 100) < 2 ||
                    sysfs_set_str(info, NULL, "metadata_version",
                                  ver) < 0) {
-                       fprintf(stderr, Name ": This kernel does not "
-                               "support external metadata.\n");
+                       pr_err("This kernel does not support external metadata.\n");
                        return 1;
                }
        }
@@ -553,7 +690,9 @@ int sysfs_set_array(struct mdinfo *info, int vers)
                return 0; /* FIXME */
        rv |= sysfs_set_str(info, NULL, "level",
                            map_num(pers, info->array.level));
-       rv |= sysfs_set_num(info, NULL, "raid_disks", info->array.raid_disks);
+       if (info->reshape_active && info->delta_disks != UnSet)
+               raid_disks -= info->delta_disks;
+       rv |= sysfs_set_num(info, NULL, "raid_disks", raid_disks);
        rv |= sysfs_set_num(info, NULL, "chunk_size", info->array.chunk_size);
        rv |= sysfs_set_num(info, NULL, "layout", info->array.layout);
        rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2);
@@ -563,9 +702,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
                rc = sysfs_set_num(info, NULL, "array_size",
                                   info->custom_array_size/2);
                if (rc && errno == ENOENT) {
-                       fprintf(stderr, Name ": This kernel does not "
-                               "have the md/array_size attribute, "
-                               "the array may be larger than expected\n");
+                       pr_err("This kernel does not have the md/array_size attribute, the array may be larger than expected\n");
                        rc = 0;
                }
                rv |= rc;
@@ -573,6 +710,28 @@ int sysfs_set_array(struct mdinfo *info, int vers)
 
        if (info->array.level > 0)
                rv |= sysfs_set_num(info, NULL, "resync_start", info->resync_start);
+
+       if (info->reshape_active) {
+               rv |= sysfs_set_num(info, NULL, "reshape_position",
+                                   info->reshape_progress);
+               rv |= sysfs_set_num(info, NULL, "chunk_size", info->new_chunk);
+               rv |= sysfs_set_num(info, NULL, "layout", info->new_layout);
+               rv |= sysfs_set_num(info, NULL, "raid_disks",
+                                   info->array.raid_disks);
+               /* We don't set 'new_level' here.  That can only happen
+                * once the reshape completes.
+                */
+       }
+
+       if (info->consistency_policy == CONSISTENCY_POLICY_PPL) {
+               if (sysfs_set_str(info, NULL, "consistency_policy",
+                                 map_num(consistency_policies,
+                                         info->consistency_policy))) {
+                       pr_err("This kernel does not support PPL. Falling back to consistency-policy=resync.\n");
+                       info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+               }
+       }
+
        return rv;
 }
 
@@ -582,6 +741,7 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
        char nm[PATH_MAX];
        char *dname;
        int rv;
+       int i;
 
        sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
        rv = sysfs_set_str(sra, NULL, "new_dev", dv);
@@ -589,13 +749,7 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
                return rv;
 
        memset(nm, 0, sizeof(nm));
-       sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor);
-       rv = readlink(dv, nm, sizeof(nm));
-       if (rv <= 0)
-               return -1;
-       nm[rv] = '\0';
-       dname = strrchr(nm, '/');
-       if (dname) dname++;
+       dname = devid2kname(makedev(sd->disk.major, sd->disk.minor));
        strcpy(sd->sys_name, "dev-");
        strcpy(sd->sys_name+4, dname);
 
@@ -609,6 +763,10 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
        rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
        rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
        if (sra->array.level != LEVEL_CONTAINER) {
+               if (sra->consistency_policy == CONSISTENCY_POLICY_PPL) {
+                       rv |= sysfs_set_num(sra, sd, "ppl_sector", sd->ppl_sector);
+                       rv |= sysfs_set_num(sra, sd, "ppl_size", sd->ppl_size);
+               }
                if (sd->recovery_start == MaxSector)
                        /* This can correctly fail if array isn't started,
                         * yet, so just ignore status for now.
@@ -619,6 +777,28 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
                if (resume)
                        sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
        }
+       if (sd->bb.supported) {
+               if (sysfs_set_str(sra, sd, "state", "external_bbl")) {
+                       /*
+                        * backward compatibility - if kernel doesn't support
+                        * bad blocks for external metadata, let it continue
+                        * as long as there are none known so far
+                        */
+                       if (sd->bb.count) {
+                               pr_err("The kernel has no support for bad blocks in external metadata\n");
+                               return -1;
+                       }
+               }
+
+               for (i = 0; i < sd->bb.count; i++) {
+                       char s[30];
+                       const struct md_bb_entry *entry = &sd->bb.entries[i];
+
+                       snprintf(s, sizeof(s) - 1, "%llu %d\n", entry->sector,
+                                entry->length);
+                       rv |= sysfs_set_str(sra, sd, "bad_blocks", s);
+               }
+       }
        return rv;
 }
 
@@ -631,7 +811,7 @@ int sysfs_disk_to_sg(int fd)
        struct stat st;
        char path[256];
        char sg_path[256];
-       char sg_major_minor[8];
+       char sg_major_minor[10];
        char *c;
        DIR *dir;
        struct dirent *de;
@@ -666,7 +846,7 @@ int sysfs_disk_to_sg(int fd)
 
        rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
        close(fd);
-       if (rv < 0)
+       if (rv < 0 || rv == sizeof(sg_major_minor))
                return -1;
        else
                sg_major_minor[rv - 1] = '\0';
@@ -693,77 +873,66 @@ int sysfs_disk_to_scsi_id(int fd, __u32 *id)
        /* from an open block device, try to retrieve it scsi_id */
        struct stat st;
        char path[256];
-       char *c1, *c2;
        DIR *dir;
        struct dirent *de;
+       int host, bus, target, lun;
 
        if (fstat(fd, &st))
                return 1;
 
-       snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
+       snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device/scsi_device",
                 major(st.st_rdev), minor(st.st_rdev));
 
        dir = opendir(path);
        if (!dir)
                return 1;
 
-       de = readdir(dir);
-       while (de) {
-               if (strncmp("scsi_disk:", de->d_name,
-                           strlen("scsi_disk:")) == 0)
+       for (de = readdir(dir); de; de = readdir(dir)) {
+               int count;
+
+               if (de->d_type != DT_DIR)
+                       continue;
+
+               count = sscanf(de->d_name, "%d:%d:%d:%d", &host, &bus, &target, &lun);
+               if (count == 4)
                        break;
-               de = readdir(dir);
        }
        closedir(dir);
 
        if (!de)
                return 1;
 
-       c1 = strchr(de->d_name, ':');
-       c1++;
-       c2 = strchr(c1, ':');
-       *c2 = '\0';
-       *id = strtol(c1, NULL, 10) << 24; /* host */
-       c1 = c2 + 1;
-       c2 = strchr(c1, ':');
-       *c2 = '\0';
-       *id |= strtol(c1, NULL, 10) << 16; /* channel */
-       c1 = c2 + 1;
-       c2 = strchr(c1, ':');
-       *c2 = '\0';
-       *id |= strtol(c1, NULL, 10) << 8; /* lun */
-       c1 = c2 + 1;
-       *id |= strtol(c1, NULL, 10); /* id */
-
+       *id = (host << 24) | (bus << 16) | (target << 8) | (lun << 0);
        return 0;
 }
 
-
-int sysfs_unique_holder(int devnum, long rdev)
+int sysfs_unique_holder(char *devnm, long rdev)
 {
-       /* Check that devnum is a holder of rdev,
+       /* Check that devnm is a holder of rdev,
         * and is the only holder.
         * we should be locked against races by
-        * an O_EXCL on devnum
+        * an O_EXCL on devnm
+        * Return values:
+        *  0 - not unique, not even a holder
+        *  1 - unique, this is the only holder.
+        *  2/3 - not unique, there is another holder
+        * -1 - error, cannot find the holders
         */
        DIR *dir;
        struct dirent *de;
        char dirname[100];
        char l;
-       int found = 0;
+       int ret = 0;
        sprintf(dirname, "/sys/dev/block/%d:%d/holders",
                major(rdev), minor(rdev));
        dir = opendir(dirname);
-       errno = ENOENT;
        if (!dir)
-               return 0;
+               return -1;
        l = strlen(dirname);
        while ((de = readdir(dir)) != NULL) {
-               char buf[10];
+               char buf[100];
+               char *sl;
                int n;
-               int mj, mn;
-               char c;
-               int fd;
 
                if (de->d_ino == 0)
                        continue;
@@ -771,41 +940,28 @@ int sysfs_unique_holder(int devnum, long rdev)
                        continue;
                strcpy(dirname+l, "/");
                strcat(dirname+l, de->d_name);
-               strcat(dirname+l, "/dev");
-               fd = open(dirname, O_RDONLY);
-               if (fd < 0) {
-                       errno = ENOENT;
-                       break;
-               }
-               n = read(fd, buf, sizeof(buf)-1);
-               close(fd);
+               n = readlink(dirname, buf, sizeof(buf)-1);
+               if (n <= 0)
+                       continue;
                buf[n] = 0;
-               if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 ||
-                   c != '\n') {
-                       errno = ENOENT;
-                       break;
-               }
-               if (mj != MD_MAJOR)
-                       mn = -1-(mn>>6);
+               sl = strrchr(buf, '/');
+               if (!sl)
+                       continue;
+               sl++;
 
-               if (devnum != mn) {
-                       errno = EEXIST;
-                       break;
-               }
-               found = 1;
+               if (strcmp(devnm, sl) == 0)
+                       ret |= 1;
+               else
+                       ret |= 2;
        }
        closedir(dir);
-       if (de)
-               return 0;
-       else
-               return found;
+       return ret;
 }
 
 int sysfs_freeze_array(struct mdinfo *sra)
 {
        /* Try to freeze resync/rebuild on this array/container.
         * Return -1 if the array is busy,
-        * return -2 container cannot be frozen,
         * return 0 if this kernel doesn't support 'frozen'
         * return 1 if it worked.
         */
@@ -815,113 +971,197 @@ int sysfs_freeze_array(struct mdinfo *sra)
                return 1; /* no sync_action == frozen */
        if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
                return 0;
-       if (strcmp(buf, "idle\n") != 0 &&
-           strcmp(buf, "frozen\n") != 0)
+       if (strcmp(buf, "frozen\n") == 0)
+               /* Already frozen */
+               return 0;
+       if (strcmp(buf, "idle\n") != 0 && strcmp(buf, "recover\n") != 0)
                return -1;
        if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
                return 0;
        return 1;
 }
 
-#ifndef MDASSEMBLE
+int sysfs_wait(int fd, int *msec)
+{
+       /* Wait up to '*msec' for fd to have an exception condition.
+        * if msec == NULL, wait indefinitely.
+        */
+       fd_set fds;
+       int n;
+       FD_ZERO(&fds);
+       FD_SET(fd, &fds);
+       if (msec == NULL)
+               n = select(fd+1, NULL, NULL, &fds, NULL);
+       else if (*msec < 0)
+               n = 0;
+       else {
+               struct timeval start, end, tv;
+               gettimeofday(&start, NULL);
+               if (*msec < 1000) {
+                       tv.tv_sec = 0;
+                       tv.tv_usec = (*msec)*1000;
+               } else {
+                       tv.tv_sec = (*msec)/1000;
+                       tv.tv_usec = 0;
+               }
+               n = select(fd+1, NULL, NULL, &fds, &tv);
+               gettimeofday(&end, NULL);
+               end.tv_sec -= start.tv_sec;
+               *msec -= (end.tv_sec * 1000 + end.tv_usec/1000
+                         - start.tv_usec/1000) + 1;
+       }
+       return n;
+}
+
+int sysfs_rules_apply_check(const struct mdinfo *sra,
+                           const struct sysfs_entry *ent)
+{
+       /* Check whether parameter is regular file,
+        * exists and is under specified directory.
+        */
+       char fname[MAX_SYSFS_PATH_LEN];
+       char dname[MAX_SYSFS_PATH_LEN];
+       char resolved_path[PATH_MAX];
+       char resolved_dir[PATH_MAX];
+       int result;
 
-static char *clean_states[] = {
-       "clear", "inactive", "readonly", "read-auto", "clean", NULL };
+       if (sra == NULL || ent == NULL)
+               return -1;
+
+       result = snprintf(dname, MAX_SYSFS_PATH_LEN,
+                         "/sys/block/%s/md/", sra->sys_name);
+       if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+               return -1;
 
-int WaitClean(char *dev, int sock, int verbose)
+       result = snprintf(fname, MAX_SYSFS_PATH_LEN,
+                         "%s/%s", dname, ent->name);
+       if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+               return -1;
+
+       if (realpath(fname, resolved_path) == NULL ||
+           realpath(dname, resolved_dir) == NULL)
+               return -1;
+
+       if (strncmp(resolved_dir, resolved_path,
+                   strnlen(resolved_dir, PATH_MAX)) != 0)
+               return -1;
+
+       return 0;
+}
+
+static struct dev_sysfs_rule *sysfs_rules;
+
+void sysfs_rules_apply(char *devnm, struct mdinfo *dev)
 {
-       int fd;
-       struct mdinfo *mdi;
-       int rv = 1;
-       int devnum;
-
-       fd = open(dev, O_RDONLY); 
-       if (fd < 0) {
-               if (verbose)
-                       fprintf(stderr, Name ": Couldn't open %s: %s\n", dev, strerror(errno));
-               return 1;
-       }
+       struct dev_sysfs_rule *rules = sysfs_rules;
 
-       devnum = fd2devnum(fd);
-       mdi = sysfs_read(fd, devnum, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
-       if (!mdi) {
-               if (verbose)
-                       fprintf(stderr, Name ": Failed to read sysfs attributes for "
-                               "%s\n", dev);
-               close(fd);
-               return 0;
-       }
+       while (rules) {
+               struct sysfs_entry *ent = rules->entry;
+               int match  = 0;
 
-       switch(mdi->array.level) {
-       case LEVEL_LINEAR:
-       case LEVEL_MULTIPATH:
-       case 0:
-               /* safemode delay is irrelevant for these levels */
-               rv = 0;
-               
+               if (!rules->uuid_set) {
+                       if (rules->devname)
+                               match = strcmp(devnm, rules->devname) == 0;
+               } else {
+                       match = memcmp(dev->uuid, rules->uuid,
+                                      sizeof(int[4])) == 0;
+               }
+
+               while (match && ent) {
+                       if (sysfs_rules_apply_check(dev, ent) < 0)
+                               pr_err("SYSFS: failed to write '%s' to '%s'\n",
+                                       ent->value, ent->name);
+                       else
+                               sysfs_set_str(dev, NULL, ent->name, ent->value);
+                       ent = ent->next;
+               }
+               rules = rules->next;
        }
+}
 
-       /* for internal metadata the kernel handles the final clean
-        * transition, containers can never be dirty
-        */
-       if (!is_subarray(mdi->text_version))
-               rv = 0;
+static void sysfs_rule_free(struct dev_sysfs_rule *rule)
+{
+       struct sysfs_entry *entry;
 
-       /* safemode disabled ? */
-       if (mdi->safe_mode_delay == 0)
-               rv = 0;
+       while (rule) {
+               struct dev_sysfs_rule *tmp = rule->next;
 
-       if (rv) {
-               int state_fd = sysfs_open(fd2devnum(fd), NULL, "array_state");
-               char buf[20];
-               fd_set fds;
-               struct timeval tm;
+               entry = rule->entry;
+               while (entry) {
+                       struct sysfs_entry *tmp = entry->next;
 
-               /* minimize the safe_mode_delay and prepare to wait up to 5s
-                * for writes to quiesce
-                */
-               sysfs_set_safemode(mdi, 1);
-               tm.tv_sec = 5;
-               tm.tv_usec = 0;
-
-               FD_ZERO(&fds);
-
-               /* wait for array_state to be clean */
-               while (1) {
-                       rv = read(state_fd, buf, sizeof(buf));
-                       if (rv < 0)
-                               break;
-                       if (sysfs_match_word(buf, clean_states) <= 4)
-                               break;
-                       FD_SET(state_fd, &fds);
-                       rv = select(state_fd + 1, NULL, NULL, &fds, &tm);
-                       if (rv < 0 && errno != EINTR)
-                               break;
-                       lseek(state_fd, 0, SEEK_SET);
+                       free(entry->name);
+                       free(entry->value);
+                       free(entry);
+                       entry = tmp;
                }
-               if (rv < 0)
-                       rv = 1;
-               else if (fping_monitor(sock) == 0 ||
-                        ping_monitor(mdi->text_version) == 0) {
-                       /* we need to ping to close the window between array
-                        * state transitioning to clean and the metadata being
-                        * marked clean
-                        */
-                       rv = 0;
-               } else
-                       rv = 1;
-               if (rv && verbose)
-                       fprintf(stderr, Name ": Error waiting for %s to be clean\n",
-                               dev);
 
-               /* restore the original safe_mode_delay */
-               sysfs_set_safemode(mdi, mdi->safe_mode_delay);
-               close(state_fd);
+               if (rule->devname)
+                       free(rule->devname);
+               free(rule);
+               rule = tmp;
        }
+}
 
-       sysfs_free(mdi);
-       close(fd);
+void sysfsline(char *line)
+{
+       struct dev_sysfs_rule *sr;
+       char *w;
+
+       sr = xcalloc(1, sizeof(*sr));
+       for (w = dl_next(line); w != line ; w = dl_next(w)) {
+               if (strncasecmp(w, "name=", 5) == 0) {
+                       char *devname = w + 5;
+
+                       if (strncmp(devname, "/dev/md/", 8) == 0) {
+                               if (sr->devname)
+                                       pr_err("Only give one device per SYSFS line: %s\n",
+                                               devname);
+                               else
+                                       sr->devname = xstrdup(devname);
+                       } else {
+                               pr_err("%s is an invalid name for an md device - ignored.\n",
+                                      devname);
+                       }
+               } else if (strncasecmp(w, "uuid=", 5) == 0) {
+                       char *uuid = w + 5;
 
-       return rv;
+                       if (sr->uuid_set) {
+                               pr_err("Only give one uuid per SYSFS line: %s\n",
+                                       uuid);
+                       } else {
+                               if (parse_uuid(w + 5, sr->uuid) &&
+                                   memcmp(sr->uuid, uuid_zero,
+                                          sizeof(int[4])) != 0)
+                                       sr->uuid_set = 1;
+                               else
+                                       pr_err("Invalid uuid: %s\n", uuid);
+                       }
+               } else {
+                       struct sysfs_entry *prop;
+
+                       char *sep = strchr(w, '=');
+
+                       if (sep == NULL || *(sep + 1) == 0) {
+                               pr_err("Cannot parse \"%s\" - ignoring.\n", w);
+                               continue;
+                       }
+
+                       prop = xmalloc(sizeof(*prop));
+                       prop->value = xstrdup(sep + 1);
+                       *sep = 0;
+                       prop->name = xstrdup(w);
+                       prop->next = sr->entry;
+                       sr->entry = prop;
+               }
+       }
+
+       if (!sr->devname && !sr->uuid_set) {
+               pr_err("Device name not found in sysfs config entry - ignoring.\n");
+               sysfs_rule_free(sr);
+               return;
+       }
+
+       sr->next = sysfs_rules;
+       sysfs_rules = sr;
 }
-#endif /* MDASSEMBLE */