sysfs: avoid possible data corruption in sys_load.
[thirdparty/mdadm.git] / sysfs.c
1 /*
2  * sysfs - extract md related information from sysfs.  Part of:
3  * mdadm - manage Linux "md" devices aka RAID arrays.
4  *
5  * Copyright (C) 2006 Neil Brown <neilb@suse.de>
6  *
7  *
8  *    This program is free software; you can redistribute it and/or modify
9  *    it under the terms of the GNU General Public License as published by
10  *    the Free Software Foundation; either version 2 of the License, or
11  *    (at your option) any later version.
12  *
13  *    This program is distributed in the hope that it will be useful,
14  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *    GNU General Public License for more details.
17  *
18  *    You should have received a copy of the GNU General Public License
19  *    along with this program; if not, write to the Free Software
20  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21  *
22  *    Author: Neil Brown
23  *    Email: <neilb@suse.de>
24  */
25
26 #include        "mdadm.h"
27 #include        <dirent.h>
28
29 int load_sys(char *path, char *buf)
30 {
31         int fd = open(path, O_RDONLY);
32         int n;
33         if (fd < 0)
34                 return -1;
35         n = read(fd, buf, 1024);
36         close(fd);
37         if (n <0 || n >= 1024)
38                 return -1;
39         buf[n] = 0;
40         if (n && buf[n-1] == '\n')
41                 buf[n-1] = 0;
42         return 0;
43 }
44
45 void sysfs_free(struct mdinfo *sra)
46 {
47         while (sra) {
48                 struct mdinfo *sra2 = sra->next;
49                 while (sra->devs) {
50                         struct mdinfo *d = sra->devs;
51                         sra->devs = d->next;
52                         free(d);
53                 }
54                 free(sra);
55                 sra = sra2;
56         }
57 }
58
59 int sysfs_open(int devnum, char *devname, char *attr)
60 {
61         char fname[50];
62         int fd;
63
64         sprintf(fname, "/sys/block/%s/md/", devnum2devname(devnum));
65         if (devname) {
66                 strcat(fname, devname);
67                 strcat(fname, "/");
68         }
69         strcat(fname, attr);
70         fd = open(fname, O_RDWR);
71         if (fd < 0 && errno == EACCES)
72                 fd = open(fname, O_RDONLY);
73         return fd;
74 }
75
76 struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
77 {
78         /* Longest possible name in sysfs, mounted at /sys, is
79          *  /sys/block/md_dXXX/md/dev-XXXXX/block/dev
80          *  /sys/block/md_dXXX/md/metadata_version
81          * which is about 41 characters.  50 should do for now
82          */
83         char fname[50];
84         char buf[1024];
85         char *base;
86         char *dbase;
87         struct mdinfo *sra;
88         struct mdinfo *dev;
89         DIR *dir = NULL;
90         struct dirent *de;
91
92         sra = malloc(sizeof(*sra));
93         if (sra == NULL)
94                 return sra;
95         sra->next = NULL;
96
97         if (fd >= 0) {
98                 struct stat stb;
99                 mdu_version_t vers;
100                 if (fstat(fd, &stb)) return NULL;
101                 if (ioctl(fd, RAID_VERSION, &vers) != 0)
102                         return NULL;
103                 if (major(stb.st_rdev)==9)
104                         sprintf(sra->sys_name, "md%d", (int)minor(stb.st_rdev));
105                 else
106                         sprintf(sra->sys_name, "md_d%d",
107                                 (int)minor(stb.st_rdev)>>MdpMinorShift);
108         } else {
109                 if (devnum >= 0)
110                         sprintf(sra->sys_name, "md%d", devnum);
111                 else
112                         sprintf(sra->sys_name, "md_d%d",
113                                 -1-devnum);
114         }
115         sprintf(fname, "/sys/block/%s/md/", sra->sys_name);
116         base = fname + strlen(fname);
117
118         sra->devs = NULL;
119         if (options & GET_VERSION) {
120                 strcpy(base, "metadata_version");
121                 if (load_sys(fname, buf))
122                         goto abort;
123                 if (strncmp(buf, "none", 4) == 0) {
124                         sra->array.major_version =
125                                 sra->array.minor_version = -1;
126                         strcpy(sra->text_version, "");
127                 } else if (strncmp(buf, "external:", 9) == 0) {
128                         sra->array.major_version = -1;
129                         sra->array.minor_version = -2;
130                         strcpy(sra->text_version, buf+9);
131                 } else {
132                         sscanf(buf, "%d.%d",
133                                &sra->array.major_version,
134                                &sra->array.minor_version);
135                         strcpy(sra->text_version, buf);
136                 }
137         }
138         if (options & GET_LEVEL) {
139                 strcpy(base, "level");
140                 if (load_sys(fname, buf))
141                         goto abort;
142                 sra->array.level = map_name(pers, buf);
143         }
144         if (options & GET_LAYOUT) {
145                 strcpy(base, "layout");
146                 if (load_sys(fname, buf))
147                         goto abort;
148                 sra->array.layout = strtoul(buf, NULL, 0);
149         }
150         if (options & GET_DISKS) {
151                 strcpy(base, "raid_disks");
152                 if (load_sys(fname, buf))
153                         goto abort;
154                 sra->array.raid_disks = strtoul(buf, NULL, 0);
155         }
156         if (options & GET_COMPONENT) {
157                 strcpy(base, "component_size");
158                 if (load_sys(fname, buf))
159                         goto abort;
160                 sra->component_size = strtoull(buf, NULL, 0);
161                 /* sysfs reports "K", but we want sectors */
162                 sra->component_size *= 2;
163         }
164         if (options & GET_CHUNK) {
165                 strcpy(base, "chunk_size");
166                 if (load_sys(fname, buf))
167                         goto abort;
168                 sra->array.chunk_size = strtoul(buf, NULL, 0);
169         }
170         if (options & GET_CACHE) {
171                 strcpy(base, "stripe_cache_size");
172                 if (load_sys(fname, buf))
173                         goto abort;
174                 sra->cache_size = strtoul(buf, NULL, 0);
175         }
176         if (options & GET_MISMATCH) {
177                 strcpy(base, "mismatch_cnt");
178                 if (load_sys(fname, buf))
179                         goto abort;
180                 sra->mismatch_cnt = strtoul(buf, NULL, 0);
181         }
182
183         if (! (options & GET_DEVS))
184                 return sra;
185
186         /* Get all the devices as well */
187         *base = 0;
188         dir = opendir(fname);
189         if (!dir)
190                 goto abort;
191         sra->array.spare_disks = 0;
192
193         while ((de = readdir(dir)) != NULL) {
194                 char *ep;
195                 if (de->d_ino == 0 ||
196                     strncmp(de->d_name, "dev-", 4) != 0)
197                         continue;
198                 strcpy(base, de->d_name);
199                 dbase = base + strlen(base);
200                 *dbase++ = '/';
201
202                 dev = malloc(sizeof(*dev));
203                 if (!dev)
204                         goto abort;
205                 dev->next = sra->devs;
206                 sra->devs = dev;
207                 strcpy(dev->sys_name, de->d_name);
208
209                 /* Always get slot, major, minor */
210                 strcpy(dbase, "slot");
211                 if (load_sys(fname, buf))
212                         goto abort;
213                 dev->disk.raid_disk = strtoul(buf, &ep, 10);
214                 if (*ep) dev->disk.raid_disk = -1;
215
216                 strcpy(dbase, "block/dev");
217                 if (load_sys(fname, buf))
218                         goto abort;
219                 sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
220
221                 if (options & GET_OFFSET) {
222                         strcpy(dbase, "offset");
223                         if (load_sys(fname, buf))
224                                 goto abort;
225                         dev->data_offset = strtoull(buf, NULL, 0);
226                 }
227                 if (options & GET_SIZE) {
228                         strcpy(dbase, "size");
229                         if (load_sys(fname, buf))
230                                 goto abort;
231                         dev->component_size = strtoull(buf, NULL, 0) * 2;
232                 }
233                 if (options & GET_STATE) {
234                         dev->disk.state = 0;
235                         strcpy(dbase, "state");
236                         if (load_sys(fname, buf))
237                                 goto abort;
238                         if (strstr(buf, "in_sync"))
239                                 dev->disk.state |= (1<<MD_DISK_SYNC);
240                         if (strstr(buf, "faulty"))
241                                 dev->disk.state |= (1<<MD_DISK_FAULTY);
242                         if (dev->disk.state == 0)
243                                 sra->array.spare_disks++;
244                 }
245                 if (options & GET_ERROR) {
246                         strcpy(buf, "errors");
247                         if (load_sys(fname, buf))
248                                 goto abort;
249                         dev->errors = strtoul(buf, NULL, 0);
250                 }
251         }
252         closedir(dir);
253         return sra;
254
255  abort:
256         if (dir)
257                 closedir(dir);
258         sysfs_free(sra);
259         return NULL;
260 }
261
262 unsigned long long get_component_size(int fd)
263 {
264         /* Find out the component size of the array.
265          * We cannot trust GET_ARRAY_INFO ioctl as it's
266          * size field is only 32bits.
267          * So look in /sys/block/mdXXX/md/component_size
268          *
269          * This returns in units of sectors.
270          */
271         struct stat stb;
272         char fname[50];
273         int n;
274         if (fstat(fd, &stb)) return 0;
275         if (major(stb.st_rdev) == 9)
276                 sprintf(fname, "/sys/block/md%d/md/component_size",
277                         (int)minor(stb.st_rdev));
278         else
279                 sprintf(fname, "/sys/block/md_d%d/md/component_size",
280                         (int)minor(stb.st_rdev)>>MdpMinorShift);
281         fd = open(fname, O_RDONLY);
282         if (fd < 0)
283                 return 0;
284         n = read(fd, fname, sizeof(fname));
285         close(fd);
286         if (n == sizeof(fname))
287                 return 0;
288         fname[n] = 0;
289         return strtoull(fname, NULL, 10) * 2;
290 }
291
292 int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
293                   char *name, char *val)
294 {
295         char fname[50];
296         int n;
297         int fd;
298
299         sprintf(fname, "/sys/block/%s/md/%s/%s",
300                 sra->sys_name, dev?dev->sys_name:"", name);
301         fd = open(fname, O_WRONLY);
302         if (fd < 0)
303                 return -1;
304         n = write(fd, val, strlen(val));
305         close(fd);
306         if (n != strlen(val))
307                 return -1;
308         return 0;
309 }
310
311 int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
312                   char *name, unsigned long long val)
313 {
314         char valstr[50];
315         sprintf(valstr, "%llu", val);
316         return sysfs_set_str(sra, dev, name, valstr);
317 }
318
319 int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
320                        char *name, unsigned long long *val)
321 {
322         char fname[50];
323         char buf[50];
324         int n;
325         int fd;
326         char *ep;
327         sprintf(fname, "/sys/block/%s/md/%s/%s",
328                 sra->sys_name, dev?dev->sys_name:"", name);
329         fd = open(fname, O_RDONLY);
330         if (fd < 0)
331                 return -1;
332         n = read(fd, buf, sizeof(buf));
333         close(fd);
334         if (n <= 0)
335                 return -1;
336         buf[n] = 0;
337         *val = strtoull(buf, &ep, 0);
338         if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
339                 return -1;
340         return 0;
341 }
342
343 int sysfs_set_array(struct mdinfo *sra,
344                     struct mdinfo *info)
345 {
346         int rv = 0;
347         sra->array = info->array;
348
349         if (info->array.level < 0)
350                 return 0; /* FIXME */
351         rv |= sysfs_set_str(sra, NULL, "level",
352                             map_num(pers, info->array.level));
353         rv |= sysfs_set_num(sra, NULL, "raid_disks", info->array.raid_disks);
354         rv |= sysfs_set_num(sra, NULL, "chunk_size", info->array.chunk_size);
355         rv |= sysfs_set_num(sra, NULL, "layout", info->array.layout);
356         rv |= sysfs_set_num(sra, NULL, "component_size", info->component_size/2);
357         rv |= sysfs_set_num(sra, NULL, "resync_start", info->resync_start);
358         sra->array = info->array;
359         return rv;
360 }
361
362 int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd)
363 {
364         char dv[100];
365         char nm[100];
366         struct mdinfo *sd2;
367         char *dname;
368         int rv;
369
370         sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
371         rv = sysfs_set_str(sra, NULL, "new_dev", dv);
372         if (rv)
373                 return rv;
374
375         memset(nm, 0, sizeof(nm));
376         sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor);
377         rv = readlink(dv, nm, sizeof(nm));
378         if (rv <= 0)
379                 return -1;
380         nm[rv] = '\0';
381         dname = strrchr(nm, '/');
382         if (dname) dname++;
383         strcpy(sd->sys_name, "dev-");
384         strcpy(sd->sys_name+4, dname);
385
386         rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
387         rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
388         if (sra->array.level != LEVEL_CONTAINER) {
389                 rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
390 //              rv |= sysfs_set_str(sra, sd, "state", "in_sync");
391         }
392         if (! rv) {
393                 sd2 = malloc(sizeof(*sd2));
394                 *sd2 = *sd;
395                 sd2->next = sra->devs;
396                 sra->devs = sd2;
397         }
398         return rv;
399 }
400
401 int sysfs_disk_to_sg(int fd)
402 {
403         /* from an open block device, try find and open its corresponding
404          * scsi_generic interface
405          */
406         struct stat st;
407         char path[256];
408         char sg_path[256];
409         char sg_major_minor[8];
410         char *c;
411         DIR *dir;
412         struct dirent *de;
413         int major, minor, rv;
414
415         if (fstat(fd, &st))
416                 return -1;
417
418         snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
419                  major(st.st_rdev), minor(st.st_rdev));
420
421         dir = opendir(path);
422         if (!dir)
423                 return -1;
424
425         de = readdir(dir);
426         while (de) {
427                 if (strncmp("scsi_generic:", de->d_name,
428                             strlen("scsi_generic:")) == 0)
429                         break;
430                 de = readdir(dir);
431         }
432         closedir(dir);
433
434         if (!de)
435                 return -1;
436
437         snprintf(sg_path, sizeof(sg_path), "%s/%s/dev", path, de->d_name);
438         fd = open(sg_path, O_RDONLY);
439         if (fd < 0)
440                 return fd;
441
442         rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
443         close(fd);
444         if (rv < 0)
445                 return -1;
446         else
447                 sg_major_minor[rv - 1] = '\0';
448
449         c = strchr(sg_major_minor, ':');
450         *c = '\0';
451         c++;
452         major = strtol(sg_major_minor, NULL, 10);
453         minor = strtol(c, NULL, 10);
454         snprintf(path, sizeof(path), "/dev/.tmp.md.%d:%d:%d",
455                  (int) getpid(), major, minor);
456         if (mknod(path, S_IFCHR|0600, makedev(major, minor))==0) {
457                         fd = open(path, O_RDONLY);
458                         unlink(path);
459                         return fd;
460         }
461
462         return -1;
463 }
464
465 int sysfs_disk_to_scsi_id(int fd, __u32 *id)
466 {
467         /* from an open block device, try to retrieve it scsi_id */
468         struct stat st;
469         char path[256];
470         char *c1, *c2;
471         DIR *dir;
472         struct dirent *de;
473
474         if (fstat(fd, &st))
475                 return 1;
476
477         snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
478                  major(st.st_rdev), minor(st.st_rdev));
479
480         dir = opendir(path);
481         if (!dir)
482                 return 1;
483
484         de = readdir(dir);
485         while (de) {
486                 if (strncmp("scsi_disk:", de->d_name,
487                             strlen("scsi_disk:")) == 0)
488                         break;
489                 de = readdir(dir);
490         }
491         closedir(dir);
492
493         if (!de)
494                 return 1;
495
496         c1 = strchr(de->d_name, ':');
497         c1++;
498         c2 = strchr(c1, ':');
499         *c2 = '\0';
500         *id = strtol(c1, NULL, 10) << 24; /* host */
501         c1 = c2 + 1;
502         c2 = strchr(c1, ':');
503         *c2 = '\0';
504         *id |= strtol(c1, NULL, 10) << 16; /* channel */
505         c1 = c2 + 1;
506         c2 = strchr(c1, ':');
507         *c2 = '\0';
508         *id |= strtol(c1, NULL, 10) << 8; /* lun */
509         c1 = c2 + 1;
510         *id |= strtol(c1, NULL, 10); /* id */
511
512         return 0;
513 }
514
515
516 int sysfs_unique_holder(int devnum, long rdev)
517 {
518         /* Check that devnum is a holder of rdev,
519          * and is the only holder.
520          * we should be locked against races by
521          * an O_EXCL on devnum
522          */
523         DIR *dir;
524         struct dirent *de;
525         char dirname[100];
526         char l;
527         int found = 0;
528         sprintf(dirname, "/sys/dev/block/%d:%d/holders",
529                 major(rdev), minor(rdev));
530         dir = opendir(dirname);
531         errno = ENOENT;
532         if (!dir)
533                 return 0;
534         l = strlen(dirname);
535         while ((de = readdir(dir)) != NULL) {
536                 char buf[10];
537                 int n;
538                 int mj, mn;
539                 char c;
540                 int fd;
541
542                 if (de->d_ino == 0)
543                         continue;
544                 if (de->d_name[0] == '.')
545                         continue;
546                 strcpy(dirname+l, "/");
547                 strcat(dirname+l, de->d_name);
548                 strcat(dirname+l, "/dev");
549                 fd = open(dirname, O_RDONLY);
550                 if (fd < 0) {
551                         errno = ENOENT;
552                         break;
553                 }
554                 n = read(fd, buf, sizeof(buf)-1);
555                 close(fd);
556                 buf[n] = 0;
557                 if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 ||
558                     c != '\n') {
559                         errno = ENOENT;
560                         break;
561                 }
562                 if (mj != MD_MAJOR)
563                         mn = -1-(mn>>6);
564
565                 if (devnum != mn) {
566                         errno = EEXIST;
567                         break;
568                 }
569                 found = 1;
570         }
571         closedir(dir);
572         if (de)
573                 return 0;
574         else
575                 return found;
576 }