bf5c8c5db282d1d9a9f027d3524bea45597e8742
[thirdparty/mdadm.git] / sysfs.c
1 /*
2  * sysfs - extract md related information from sysfs.  Part of:
3  * mdadm - manage Linux "md" devices aka RAID arrays.
4  *
5  * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
6  *
7  *
8  *    This program is free software; you can redistribute it and/or modify
9  *    it under the terms of the GNU General Public License as published by
10  *    the Free Software Foundation; either version 2 of the License, or
11  *    (at your option) any later version.
12  *
13  *    This program is distributed in the hope that it will be useful,
14  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *    GNU General Public License for more details.
17  *
18  *    You should have received a copy of the GNU General Public License
19  *    along with this program; if not, write to the Free Software
20  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21  *
22  *    Author: Neil Brown
23  *    Email: <neilb@suse.de>
24  */
25
26 #include        "mdadm.h"
27 #include        <dirent.h>
28 #include        <ctype.h>
29
30 #define MAX_SYSFS_PATH_LEN      120
31
32 int load_sys(char *path, char *buf, int len)
33 {
34         int fd = open(path, O_RDONLY);
35         int n;
36         if (fd < 0)
37                 return -1;
38         n = read(fd, buf, len);
39         close(fd);
40         if (n <0 || n >= len)
41                 return -1;
42         buf[n] = 0;
43         if (n && buf[n-1] == '\n')
44                 buf[n-1] = 0;
45         return 0;
46 }
47
48 void sysfs_free(struct mdinfo *sra)
49 {
50         while (sra) {
51                 struct mdinfo *sra2 = sra->next;
52                 while (sra->devs) {
53                         struct mdinfo *d = sra->devs;
54                         sra->devs = d->next;
55                         free(d->bb.entries);
56                         free(d);
57                 }
58                 free(sra->bb.entries);
59                 free(sra);
60                 sra = sra2;
61         }
62 }
63
64 int sysfs_open(char *devnm, char *devname, char *attr)
65 {
66         char fname[MAX_SYSFS_PATH_LEN];
67         int fd;
68
69         snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", devnm);
70         if (devname) {
71                 strncat(fname, devname, MAX_SYSFS_PATH_LEN - strlen(fname));
72                 strncat(fname, "/", MAX_SYSFS_PATH_LEN - strlen(fname));
73         }
74         strncat(fname, attr, MAX_SYSFS_PATH_LEN - strlen(fname));
75         fd = open(fname, O_RDWR);
76         if (fd < 0 && errno == EACCES)
77                 fd = open(fname, O_RDONLY);
78         return fd;
79 }
80
81 void sysfs_init_dev(struct mdinfo *mdi, dev_t devid)
82 {
83         snprintf(mdi->sys_name,
84                  sizeof(mdi->sys_name), "dev-%s", devid2kname(devid));
85 }
86
87 int sysfs_init(struct mdinfo *mdi, int fd, char *devnm)
88 {
89         struct stat stb;
90         char fname[MAX_SYSFS_PATH_LEN];
91         int retval = -ENODEV;
92
93         mdi->sys_name[0] = 0;
94         if (fd >= 0)
95                 devnm = fd2devnm(fd);
96
97         if (devnm == NULL)
98                 goto out;
99
100         snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md", devnm);
101
102         if (stat(fname, &stb))
103                 goto out;
104         if (!S_ISDIR(stb.st_mode))
105                 goto out;
106         strcpy(mdi->sys_name, devnm);
107
108         retval = 0;
109 out:
110         return retval;
111 }
112
113 struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
114 {
115         char fname[PATH_MAX];
116         char buf[PATH_MAX];
117         char *base;
118         char *dbase;
119         struct mdinfo *sra;
120         struct mdinfo *dev, **devp;
121         DIR *dir = NULL;
122         struct dirent *de;
123
124         sra = xcalloc(1, sizeof(*sra));
125         if (sysfs_init(sra, fd, devnm)) {
126                 free(sra);
127                 return NULL;
128         }
129
130         sprintf(fname, "/sys/block/%s/md/", sra->sys_name);
131         base = fname + strlen(fname);
132
133         sra->devs = NULL;
134         if (options & GET_VERSION) {
135                 strcpy(base, "metadata_version");
136                 if (load_sys(fname, buf, sizeof(buf)))
137                         goto abort;
138                 if (strncmp(buf, "none", 4) == 0) {
139                         sra->array.major_version =
140                                 sra->array.minor_version = -1;
141                         strcpy(sra->text_version, "");
142                 } else if (strncmp(buf, "external:", 9) == 0) {
143                         sra->array.major_version = -1;
144                         sra->array.minor_version = -2;
145                         strcpy(sra->text_version, buf+9);
146                 } else {
147                         sscanf(buf, "%d.%d",
148                                &sra->array.major_version,
149                                &sra->array.minor_version);
150                         strcpy(sra->text_version, buf);
151                 }
152         }
153         if (options & GET_LEVEL) {
154                 strcpy(base, "level");
155                 if (load_sys(fname, buf, sizeof(buf)))
156                         goto abort;
157                 sra->array.level = map_name(pers, buf);
158         }
159         if (options & GET_LAYOUT) {
160                 strcpy(base, "layout");
161                 if (load_sys(fname, buf, sizeof(buf)))
162                         goto abort;
163                 sra->array.layout = strtoul(buf, NULL, 0);
164         }
165         if (options & (GET_DISKS|GET_STATE)) {
166                 strcpy(base, "raid_disks");
167                 if (load_sys(fname, buf, sizeof(buf)))
168                         goto abort;
169                 sra->array.raid_disks = strtoul(buf, NULL, 0);
170         }
171         if (options & GET_COMPONENT) {
172                 strcpy(base, "component_size");
173                 if (load_sys(fname, buf, sizeof(buf)))
174                         goto abort;
175                 sra->component_size = strtoull(buf, NULL, 0);
176                 /* sysfs reports "K", but we want sectors */
177                 sra->component_size *= 2;
178         }
179         if (options & GET_CHUNK) {
180                 strcpy(base, "chunk_size");
181                 if (load_sys(fname, buf, sizeof(buf)))
182                         goto abort;
183                 sra->array.chunk_size = strtoul(buf, NULL, 0);
184         }
185         if (options & GET_CACHE) {
186                 strcpy(base, "stripe_cache_size");
187                 if (load_sys(fname, buf, sizeof(buf)))
188                         /* Probably level doesn't support it */
189                         sra->cache_size = 0;
190                 else
191                         sra->cache_size = strtoul(buf, NULL, 0);
192         }
193         if (options & GET_MISMATCH) {
194                 strcpy(base, "mismatch_cnt");
195                 if (load_sys(fname, buf, sizeof(buf)))
196                         goto abort;
197                 sra->mismatch_cnt = strtoul(buf, NULL, 0);
198         }
199         if (options & GET_SAFEMODE) {
200                 int scale = 1;
201                 int dot = 0;
202                 unsigned i;
203                 unsigned long msec;
204                 size_t len;
205
206                 strcpy(base, "safe_mode_delay");
207                 if (load_sys(fname, buf, sizeof(buf)))
208                         goto abort;
209
210                 /* remove a period, and count digits after it */
211                 len = strlen(buf);
212                 for (i = 0; i < len; i++) {
213                         if (dot) {
214                                 if (isdigit(buf[i])) {
215                                         buf[i-1] = buf[i];
216                                         scale *= 10;
217                                 }
218                                 buf[i] = 0;
219                         } else if (buf[i] == '.') {
220                                 dot=1;
221                                 buf[i] = 0;
222                         }
223                 }
224                 msec = strtoul(buf, NULL, 10);
225                 msec = (msec * 1000) / scale;
226                 sra->safe_mode_delay = msec;
227         }
228         if (options & GET_BITMAP_LOCATION) {
229                 strcpy(base, "bitmap/location");
230                 if (load_sys(fname, buf, sizeof(buf)))
231                         goto abort;
232                 if (strncmp(buf, "file", 4) == 0)
233                         sra->bitmap_offset = 1;
234                 else if (strncmp(buf, "none", 4) == 0)
235                         sra->bitmap_offset = 0;
236                 else if (buf[0] == '+')
237                         sra->bitmap_offset = strtol(buf+1, NULL, 10);
238                 else
239                         goto abort;
240         }
241
242         if (options & GET_ARRAY_STATE) {
243                 strcpy(base, "array_state");
244                 if (load_sys(fname, buf, sizeof(buf)))
245                         goto abort;
246                 sra->array_state = map_name(sysfs_array_states, buf);
247         }
248
249         if (options & GET_CONSISTENCY_POLICY) {
250                 strcpy(base, "consistency_policy");
251                 if (load_sys(fname, buf, sizeof(buf)))
252                         sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
253                 else
254                         sra->consistency_policy = map_name(consistency_policies,
255                                                            buf);
256         }
257
258         if (! (options & GET_DEVS))
259                 return sra;
260
261         /* Get all the devices as well */
262         *base = 0;
263         dir = opendir(fname);
264         if (!dir)
265                 goto abort;
266         sra->array.spare_disks = 0;
267         sra->array.active_disks = 0;
268         sra->array.failed_disks = 0;
269         sra->array.working_disks = 0;
270
271         devp = &sra->devs;
272         sra->devs = NULL;
273         while ((de = readdir(dir)) != NULL) {
274                 char *ep;
275                 if (de->d_ino == 0 ||
276                     strncmp(de->d_name, "dev-", 4) != 0)
277                         continue;
278                 strcpy(base, de->d_name);
279                 dbase = base + strlen(base);
280                 *dbase++ = '/';
281
282                 dev = xcalloc(1, sizeof(*dev));
283
284                 /* Always get slot, major, minor */
285                 strcpy(dbase, "slot");
286                 if (load_sys(fname, buf, sizeof(buf))) {
287                         /* hmm... unable to read 'slot' maybe the device
288                          * is going away?
289                          */
290                         strcpy(dbase, "block");
291                         if (readlink(fname, buf, sizeof(buf)) < 0 &&
292                             errno != ENAMETOOLONG) {
293                                 /* ...yup device is gone */
294                                 free(dev);
295                                 continue;
296                         } else {
297                                 /* slot is unreadable but 'block' link
298                                  * still intact... something bad is happening
299                                  * so abort
300                                  */
301                                 free(dev);
302                                 goto abort;
303                         }
304
305                 }
306                 strcpy(dev->sys_name, de->d_name);
307                 dev->disk.raid_disk = strtoul(buf, &ep, 10);
308                 if (*ep) dev->disk.raid_disk = -1;
309
310                 strcpy(dbase, "block/dev");
311                 if (load_sys(fname, buf, sizeof(buf))) {
312                         /* assume this is a stale reference to a hot
313                          * removed device
314                          */
315                         free(dev);
316                         continue;
317                 }
318                 sra->array.nr_disks++;
319                 sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
320
321                 /* special case check for block devices that can go 'offline' */
322                 strcpy(dbase, "block/device/state");
323                 if (load_sys(fname, buf, sizeof(buf)) == 0 &&
324                     strncmp(buf, "offline", 7) == 0) {
325                         free(dev);
326                         continue;
327                 }
328
329                 /* finally add this disk to the array */
330                 *devp = dev;
331                 devp = & dev->next;
332                 dev->next = NULL;
333
334                 if (options & GET_OFFSET) {
335                         strcpy(dbase, "offset");
336                         if (load_sys(fname, buf, sizeof(buf)))
337                                 goto abort;
338                         dev->data_offset = strtoull(buf, NULL, 0);
339                         strcpy(dbase, "new_offset");
340                         if (load_sys(fname, buf, sizeof(buf)) == 0)
341                                 dev->new_data_offset = strtoull(buf, NULL, 0);
342                         else
343                                 dev->new_data_offset = dev->data_offset;
344                 }
345                 if (options & GET_SIZE) {
346                         strcpy(dbase, "size");
347                         if (load_sys(fname, buf, sizeof(buf)))
348                                 goto abort;
349                         dev->component_size = strtoull(buf, NULL, 0) * 2;
350                 }
351                 if (options & GET_STATE) {
352                         dev->disk.state = 0;
353                         strcpy(dbase, "state");
354                         if (load_sys(fname, buf, sizeof(buf)))
355                                 goto abort;
356                         if (strstr(buf, "faulty"))
357                                 dev->disk.state |= (1<<MD_DISK_FAULTY);
358                         else {
359                                 sra->array.working_disks++;
360                                 if (strstr(buf, "in_sync")) {
361                                         dev->disk.state |= (1<<MD_DISK_SYNC);
362                                         sra->array.active_disks++;
363                                 }
364                                 if (dev->disk.state == 0)
365                                         sra->array.spare_disks++;
366                         }
367                 }
368                 if (options & GET_ERROR) {
369                         strcpy(buf, "errors");
370                         if (load_sys(fname, buf, sizeof(buf)))
371                                 goto abort;
372                         dev->errors = strtoul(buf, NULL, 0);
373                 }
374         }
375
376         if ((options & GET_STATE) && sra->array.raid_disks)
377                 sra->array.failed_disks = sra->array.raid_disks -
378                         sra->array.active_disks - sra->array.spare_disks;
379
380         closedir(dir);
381         return sra;
382
383  abort:
384         if (dir)
385                 closedir(dir);
386         sysfs_free(sra);
387         return NULL;
388 }
389
390 int sysfs_attr_match(const char *attr, const char *str)
391 {
392         /* See if attr, read from a sysfs file, matches
393          * str.  They must either be the same, or attr can
394          * have a trailing newline or comma
395          */
396         while (*attr && *str && *attr == *str) {
397                 attr++;
398                 str++;
399         }
400
401         if (*str || (*attr && *attr != ',' && *attr != '\n'))
402                 return 0;
403         return 1;
404 }
405
406 int sysfs_match_word(const char *word, char **list)
407 {
408         int n;
409         for (n=0; list[n]; n++)
410                 if (sysfs_attr_match(word, list[n]))
411                         break;
412         return n;
413 }
414
415 unsigned long long get_component_size(int fd)
416 {
417         /* Find out the component size of the array.
418          * We cannot trust GET_ARRAY_INFO ioctl as it's
419          * size field is only 32bits.
420          * So look in /sys/block/mdXXX/md/component_size
421          *
422          * This returns in units of sectors.
423          */
424         struct stat stb;
425         char fname[MAX_SYSFS_PATH_LEN];
426         int n;
427         if (fstat(fd, &stb))
428                 return 0;
429         snprintf(fname, MAX_SYSFS_PATH_LEN,
430                  "/sys/block/%s/md/component_size", stat2devnm(&stb));
431         fd = open(fname, O_RDONLY);
432         if (fd < 0)
433                 return 0;
434         n = read(fd, fname, sizeof(fname));
435         close(fd);
436         if (n < 0 || n == sizeof(fname))
437                 return 0;
438         fname[n] = 0;
439         return strtoull(fname, NULL, 10) * 2;
440 }
441
442 int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
443                   char *name, char *val)
444 {
445         char fname[MAX_SYSFS_PATH_LEN];
446         unsigned int n;
447         int fd;
448
449         snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
450                 sra->sys_name, dev?dev->sys_name:"", name);
451         fd = open(fname, O_WRONLY);
452         if (fd < 0)
453                 return -1;
454         n = write(fd, val, strlen(val));
455         close(fd);
456         if (n != strlen(val)) {
457                 dprintf("failed to write '%s' to '%s' (%s)\n",
458                         val, fname, strerror(errno));
459                 return -1;
460         }
461         return 0;
462 }
463
464 int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
465                   char *name, unsigned long long val)
466 {
467         char valstr[50];
468         sprintf(valstr, "%llu", val);
469         return sysfs_set_str(sra, dev, name, valstr);
470 }
471
472 int sysfs_set_num_signed(struct mdinfo *sra, struct mdinfo *dev,
473                          char *name, long long val)
474 {
475         char valstr[50];
476         sprintf(valstr, "%lli", val);
477         return sysfs_set_str(sra, dev, name, valstr);
478 }
479
480 int sysfs_uevent(struct mdinfo *sra, char *event)
481 {
482         char fname[MAX_SYSFS_PATH_LEN];
483         int n;
484         int fd;
485
486         snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/uevent",
487                 sra->sys_name);
488         fd = open(fname, O_WRONLY);
489         if (fd < 0)
490                 return -1;
491         n = write(fd, event, strlen(event));
492         close(fd);
493         if (n != (int)strlen(event)) {
494                 dprintf("failed to write '%s' to '%s' (%s)\n",
495                         event, fname, strerror(errno));
496                 return -1;
497         }
498         return 0;
499 }
500
501 int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name)
502 {
503         char fname[MAX_SYSFS_PATH_LEN];
504         struct stat st;
505
506         snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
507                 sra->sys_name, dev?dev->sys_name:"", name);
508
509         return stat(fname, &st) == 0;
510 }
511
512 int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
513                        char *name)
514 {
515         char fname[MAX_SYSFS_PATH_LEN];
516         int fd;
517
518         snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
519                 sra->sys_name, dev?dev->sys_name:"", name);
520         fd = open(fname, O_RDWR);
521         if (fd < 0)
522                 fd = open(fname, O_RDONLY);
523         return fd;
524 }
525
526 int sysfs_fd_get_ll(int fd, unsigned long long *val)
527 {
528         char buf[50];
529         int n;
530         char *ep;
531
532         lseek(fd, 0, 0);
533         n = read(fd, buf, sizeof(buf));
534         if (n <= 0 || n == sizeof(buf))
535                 return -2;
536         buf[n] = 0;
537         *val = strtoull(buf, &ep, 0);
538         if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
539                 return -1;
540         return 0;
541 }
542
543 int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
544                        char *name, unsigned long long *val)
545 {
546         int n;
547         int fd;
548
549         fd = sysfs_get_fd(sra, dev, name);
550         if (fd < 0)
551                 return -1;
552         n = sysfs_fd_get_ll(fd, val);
553         close(fd);
554         return n;
555 }
556
557 int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2)
558 {
559         /* two numbers in this sysfs file, either
560          *  NNN (NNN)
561          * or
562          *  NNN / NNN
563          */
564         char buf[80];
565         int n;
566         char *ep, *ep2;
567
568         lseek(fd, 0, 0);
569         n = read(fd, buf, sizeof(buf));
570         if (n <= 0 || n == sizeof(buf))
571                 return -2;
572         buf[n] = 0;
573         *v1 = strtoull(buf, &ep, 0);
574         if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
575                 return -1;
576         while (*ep == ' ' || *ep == '/' || *ep == '(')
577                 ep++;
578         *v2 = strtoull(ep, &ep2, 0);
579         if (ep2 == ep || (*ep2 != 0 && *ep2 != '\n' && *ep2 != ' ' && *ep2 != ')')) {
580                 *v2 = *v1;
581                 return 1;
582         }
583         return 2;
584 }
585
586 int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev,
587                   char *name, unsigned long long *v1, unsigned long long *v2)
588 {
589         int n;
590         int fd;
591
592         fd = sysfs_get_fd(sra, dev, name);
593         if (fd < 0)
594                 return -1;
595         n = sysfs_fd_get_two(fd, v1, v2);
596         close(fd);
597         return n;
598 }
599
600 int sysfs_fd_get_str(int fd, char *val, int size)
601 {
602         int n;
603
604         lseek(fd, 0, 0);
605         n = read(fd, val, size);
606         if (n <= 0 || n == size)
607                 return -1;
608         val[n] = 0;
609         return n;
610 }
611
612 int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
613                        char *name, char *val, int size)
614 {
615         int n;
616         int fd;
617
618         fd = sysfs_get_fd(sra, dev, name);
619         if (fd < 0)
620                 return -1;
621         n = sysfs_fd_get_str(fd, val, size);
622         close(fd);
623         return n;
624 }
625
626 int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms)
627 {
628         unsigned long sec;
629         unsigned long msec;
630         char delay[30];
631
632         sec = ms / 1000;
633         msec = ms % 1000;
634
635         sprintf(delay, "%ld.%03ld\n", sec, msec);
636         /*             this '\n' ^ needed for kernels older than 2.6.28 */
637         return sysfs_set_str(sra, NULL, "safe_mode_delay", delay);
638 }
639
640 int sysfs_set_array(struct mdinfo *info, int vers)
641 {
642         int rv = 0;
643         char ver[100];
644         int raid_disks = info->array.raid_disks;
645
646         ver[0] = 0;
647         if (info->array.major_version == -1 &&
648             info->array.minor_version == -2) {
649                 char buf[1024];
650
651                 strcat(strcpy(ver, "external:"), info->text_version);
652
653                 /* meta version might already be set if we are setting
654                  * new geometry for a reshape.  In that case we don't
655                  * want to over-write the 'readonly' flag that is
656                  * stored in the metadata version.  So read the current
657                  * version first, and preserve the flag
658                  */
659                 if (sysfs_get_str(info, NULL, "metadata_version",
660                                   buf, 1024) > 0)
661                         if (strlen(buf) >= 9 && buf[9] == '-')
662                                 ver[9] = '-';
663
664                 if ((vers % 100) < 2 ||
665                     sysfs_set_str(info, NULL, "metadata_version",
666                                   ver) < 0) {
667                         pr_err("This kernel does not support external metadata.\n");
668                         return 1;
669                 }
670         }
671         if (info->array.level < 0)
672                 return 0; /* FIXME */
673         rv |= sysfs_set_str(info, NULL, "level",
674                             map_num(pers, info->array.level));
675         if (info->reshape_active && info->delta_disks != UnSet)
676                 raid_disks -= info->delta_disks;
677         rv |= sysfs_set_num(info, NULL, "raid_disks", raid_disks);
678         rv |= sysfs_set_num(info, NULL, "chunk_size", info->array.chunk_size);
679         rv |= sysfs_set_num(info, NULL, "layout", info->array.layout);
680         rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2);
681         if (info->custom_array_size) {
682                 int rc;
683
684                 rc = sysfs_set_num(info, NULL, "array_size",
685                                    info->custom_array_size/2);
686                 if (rc && errno == ENOENT) {
687                         pr_err("This kernel does not have the md/array_size attribute, the array may be larger than expected\n");
688                         rc = 0;
689                 }
690                 rv |= rc;
691         }
692
693         if (info->array.level > 0)
694                 rv |= sysfs_set_num(info, NULL, "resync_start", info->resync_start);
695
696         if (info->reshape_active) {
697                 rv |= sysfs_set_num(info, NULL, "reshape_position",
698                                     info->reshape_progress);
699                 rv |= sysfs_set_num(info, NULL, "chunk_size", info->new_chunk);
700                 rv |= sysfs_set_num(info, NULL, "layout", info->new_layout);
701                 rv |= sysfs_set_num(info, NULL, "raid_disks",
702                                     info->array.raid_disks);
703                 /* We don't set 'new_level' here.  That can only happen
704                  * once the reshape completes.
705                  */
706         }
707
708         if (info->consistency_policy == CONSISTENCY_POLICY_PPL) {
709                 if (sysfs_set_str(info, NULL, "consistency_policy",
710                                   map_num(consistency_policies,
711                                           info->consistency_policy))) {
712                         pr_err("This kernel does not support PPL. Falling back to consistency-policy=resync.\n");
713                         info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
714                 }
715         }
716
717         return rv;
718 }
719
720 int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
721 {
722         char dv[PATH_MAX];
723         char nm[PATH_MAX];
724         char *dname;
725         int rv;
726         int i;
727
728         sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
729         rv = sysfs_set_str(sra, NULL, "new_dev", dv);
730         if (rv)
731                 return rv;
732
733         memset(nm, 0, sizeof(nm));
734         dname = devid2kname(makedev(sd->disk.major, sd->disk.minor));
735         strcpy(sd->sys_name, "dev-");
736         strcpy(sd->sys_name+4, dname);
737
738         /* test write to see if 'recovery_start' is available */
739         if (resume && sd->recovery_start < MaxSector &&
740             sysfs_set_num(sra, sd, "recovery_start", 0)) {
741                 sysfs_set_str(sra, sd, "state", "remove");
742                 return -1;
743         }
744
745         rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
746         rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
747         if (sra->array.level != LEVEL_CONTAINER) {
748                 if (sra->consistency_policy == CONSISTENCY_POLICY_PPL) {
749                         rv |= sysfs_set_num(sra, sd, "ppl_sector", sd->ppl_sector);
750                         rv |= sysfs_set_num(sra, sd, "ppl_size", sd->ppl_size);
751                 }
752                 if (sd->recovery_start == MaxSector)
753                         /* This can correctly fail if array isn't started,
754                          * yet, so just ignore status for now.
755                          */
756                         sysfs_set_str(sra, sd, "state", "insync");
757                 if (sd->disk.raid_disk >= 0)
758                         rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
759                 if (resume)
760                         sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
761         }
762         if (sd->bb.supported) {
763                 if (sysfs_set_str(sra, sd, "state", "external_bbl")) {
764                         /*
765                          * backward compatibility - if kernel doesn't support
766                          * bad blocks for external metadata, let it continue
767                          * as long as there are none known so far
768                          */
769                         if (sd->bb.count) {
770                                 pr_err("The kernel has no support for bad blocks in external metadata\n");
771                                 return -1;
772                         }
773                 }
774
775                 for (i = 0; i < sd->bb.count; i++) {
776                         char s[30];
777                         const struct md_bb_entry *entry = &sd->bb.entries[i];
778
779                         snprintf(s, sizeof(s) - 1, "%llu %d\n", entry->sector,
780                                  entry->length);
781                         rv |= sysfs_set_str(sra, sd, "bad_blocks", s);
782                 }
783         }
784         return rv;
785 }
786
787 #if 0
788 int sysfs_disk_to_sg(int fd)
789 {
790         /* from an open block device, try find and open its corresponding
791          * scsi_generic interface
792          */
793         struct stat st;
794         char path[256];
795         char sg_path[256];
796         char sg_major_minor[10];
797         char *c;
798         DIR *dir;
799         struct dirent *de;
800         int major, minor, rv;
801
802         if (fstat(fd, &st))
803                 return -1;
804
805         snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
806                  major(st.st_rdev), minor(st.st_rdev));
807
808         dir = opendir(path);
809         if (!dir)
810                 return -1;
811
812         de = readdir(dir);
813         while (de) {
814                 if (strncmp("scsi_generic:", de->d_name,
815                             strlen("scsi_generic:")) == 0)
816                         break;
817                 de = readdir(dir);
818         }
819         closedir(dir);
820
821         if (!de)
822                 return -1;
823
824         snprintf(sg_path, sizeof(sg_path), "%s/%s/dev", path, de->d_name);
825         fd = open(sg_path, O_RDONLY);
826         if (fd < 0)
827                 return fd;
828
829         rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
830         close(fd);
831         if (rv < 0 || rv == sizeof(sg_major_minor))
832                 return -1;
833         else
834                 sg_major_minor[rv - 1] = '\0';
835
836         c = strchr(sg_major_minor, ':');
837         *c = '\0';
838         c++;
839         major = strtol(sg_major_minor, NULL, 10);
840         minor = strtol(c, NULL, 10);
841         snprintf(path, sizeof(path), "/dev/.tmp.md.%d:%d:%d",
842                  (int) getpid(), major, minor);
843         if (mknod(path, S_IFCHR|0600, makedev(major, minor))==0) {
844                         fd = open(path, O_RDONLY);
845                         unlink(path);
846                         return fd;
847         }
848
849         return -1;
850 }
851 #endif
852
853 int sysfs_disk_to_scsi_id(int fd, __u32 *id)
854 {
855         /* from an open block device, try to retrieve it scsi_id */
856         struct stat st;
857         char path[256];
858         DIR *dir;
859         struct dirent *de;
860         int host, bus, target, lun;
861
862         if (fstat(fd, &st))
863                 return 1;
864
865         snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device/scsi_device",
866                  major(st.st_rdev), minor(st.st_rdev));
867
868         dir = opendir(path);
869         if (!dir)
870                 return 1;
871
872         for (de = readdir(dir); de; de = readdir(dir)) {
873                 int count;
874
875                 if (de->d_type != DT_DIR)
876                         continue;
877
878                 count = sscanf(de->d_name, "%d:%d:%d:%d", &host, &bus, &target, &lun);
879                 if (count == 4)
880                         break;
881         }
882         closedir(dir);
883
884         if (!de)
885                 return 1;
886
887         *id = (host << 24) | (bus << 16) | (target << 8) | (lun << 0);
888         return 0;
889 }
890
891 int sysfs_unique_holder(char *devnm, long rdev)
892 {
893         /* Check that devnm is a holder of rdev,
894          * and is the only holder.
895          * we should be locked against races by
896          * an O_EXCL on devnm
897          * Return values:
898          *  0 - not unique, not even a holder
899          *  1 - unique, this is the only holder.
900          *  2/3 - not unique, there is another holder
901          * -1 - error, cannot find the holders
902          */
903         DIR *dir;
904         struct dirent *de;
905         char dirname[100];
906         char l;
907         int ret = 0;
908         sprintf(dirname, "/sys/dev/block/%d:%d/holders",
909                 major(rdev), minor(rdev));
910         dir = opendir(dirname);
911         if (!dir)
912                 return -1;
913         l = strlen(dirname);
914         while ((de = readdir(dir)) != NULL) {
915                 char buf[100];
916                 char *sl;
917                 int n;
918
919                 if (de->d_ino == 0)
920                         continue;
921                 if (de->d_name[0] == '.')
922                         continue;
923                 strcpy(dirname+l, "/");
924                 strcat(dirname+l, de->d_name);
925                 n = readlink(dirname, buf, sizeof(buf)-1);
926                 if (n <= 0)
927                         continue;
928                 buf[n] = 0;
929                 sl = strrchr(buf, '/');
930                 if (!sl)
931                         continue;
932                 sl++;
933
934                 if (strcmp(devnm, sl) == 0)
935                         ret |= 1;
936                 else
937                         ret |= 2;
938         }
939         closedir(dir);
940         return ret;
941 }
942
943 int sysfs_freeze_array(struct mdinfo *sra)
944 {
945         /* Try to freeze resync/rebuild on this array/container.
946          * Return -1 if the array is busy,
947          * return 0 if this kernel doesn't support 'frozen'
948          * return 1 if it worked.
949          */
950         char buf[20];
951
952         if (!sysfs_attribute_available(sra, NULL, "sync_action"))
953                 return 1; /* no sync_action == frozen */
954         if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
955                 return 0;
956         if (strcmp(buf, "frozen\n") == 0)
957                 /* Already frozen */
958                 return 0;
959         if (strcmp(buf, "idle\n") != 0 && strcmp(buf, "recover\n") != 0)
960                 return -1;
961         if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
962                 return 0;
963         return 1;
964 }
965
966 int sysfs_wait(int fd, int *msec)
967 {
968         /* Wait up to '*msec' for fd to have an exception condition.
969          * if msec == NULL, wait indefinitely.
970          */
971         fd_set fds;
972         int n;
973         FD_ZERO(&fds);
974         FD_SET(fd, &fds);
975         if (msec == NULL)
976                 n = select(fd+1, NULL, NULL, &fds, NULL);
977         else if (*msec < 0)
978                 n = 0;
979         else {
980                 struct timeval start, end, tv;
981                 gettimeofday(&start, NULL);
982                 if (*msec < 1000) {
983                         tv.tv_sec = 0;
984                         tv.tv_usec = (*msec)*1000;
985                 } else {
986                         tv.tv_sec = (*msec)/1000;
987                         tv.tv_usec = 0;
988                 }
989                 n = select(fd+1, NULL, NULL, &fds, &tv);
990                 gettimeofday(&end, NULL);
991                 end.tv_sec -= start.tv_sec;
992                 *msec -= (end.tv_sec * 1000 + end.tv_usec/1000
993                           - start.tv_usec/1000) + 1;
994         }
995         return n;
996 }