sysfs: avoid possible data corruption in sys_load.
[thirdparty/mdadm.git] / sysfs.c
CommitLineData
e86c9dd6
NB
1/*
2 * sysfs - extract md related information from sysfs. Part of:
3 * mdadm - manage Linux "md" devices aka RAID arrays.
4 *
5 * Copyright (C) 2006 Neil Brown <neilb@suse.de>
6 *
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
24 */
25
26#include "mdadm.h"
27#include <dirent.h>
28
29int load_sys(char *path, char *buf)
30{
31 int fd = open(path, O_RDONLY);
32 int n;
33 if (fd < 0)
34 return -1;
35 n = read(fd, buf, 1024);
36 close(fd);
2f6079dc 37 if (n <0 || n >= 1024)
e86c9dd6
NB
38 return -1;
39 buf[n] = 0;
8dfb8619 40 if (n && buf[n-1] == '\n')
e86c9dd6
NB
41 buf[n-1] = 0;
42 return 0;
43}
44
7e0f6979 45void sysfs_free(struct mdinfo *sra)
8382f19b 46{
7e0f6979
NB
47 while (sra) {
48 struct mdinfo *sra2 = sra->next;
49 while (sra->devs) {
50 struct mdinfo *d = sra->devs;
51 sra->devs = d->next;
52 free(d);
53 }
54 free(sra);
55 sra = sra2;
8382f19b 56 }
8382f19b
NB
57}
58
549e9569
NB
59int sysfs_open(int devnum, char *devname, char *attr)
60{
61 char fname[50];
549e9569 62 int fd;
549e9569 63
77472ff8 64 sprintf(fname, "/sys/block/%s/md/", devnum2devname(devnum));
549e9569
NB
65 if (devname) {
66 strcat(fname, devname);
67 strcat(fname, "/");
68 }
69 strcat(fname, attr);
70 fd = open(fname, O_RDWR);
ea6d09b0 71 if (fd < 0 && errno == EACCES)
549e9569
NB
72 fd = open(fname, O_RDONLY);
73 return fd;
74}
75
7e0f6979 76struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
e86c9dd6
NB
77{
78 /* Longest possible name in sysfs, mounted at /sys, is
79 * /sys/block/md_dXXX/md/dev-XXXXX/block/dev
80 * /sys/block/md_dXXX/md/metadata_version
81 * which is about 41 characters. 50 should do for now
82 */
83 char fname[50];
84 char buf[1024];
85 char *base;
86 char *dbase;
7e0f6979 87 struct mdinfo *sra;
06c7f68e 88 struct mdinfo *dev;
355726fa 89 DIR *dir = NULL;
e86c9dd6
NB
90 struct dirent *de;
91
92 sra = malloc(sizeof(*sra));
93 if (sra == NULL)
94 return sra;
7e0f6979 95 sra->next = NULL;
e86c9dd6
NB
96
97 if (fd >= 0) {
98 struct stat stb;
2faf1f5f
NB
99 mdu_version_t vers;
100 if (fstat(fd, &stb)) return NULL;
101 if (ioctl(fd, RAID_VERSION, &vers) != 0)
102 return NULL;
e86c9dd6 103 if (major(stb.st_rdev)==9)
ea24acd0 104 sprintf(sra->sys_name, "md%d", (int)minor(stb.st_rdev));
e86c9dd6 105 else
7e0f6979 106 sprintf(sra->sys_name, "md_d%d",
ea24acd0 107 (int)minor(stb.st_rdev)>>MdpMinorShift);
e86c9dd6
NB
108 } else {
109 if (devnum >= 0)
7e0f6979 110 sprintf(sra->sys_name, "md%d", devnum);
e86c9dd6 111 else
7e0f6979 112 sprintf(sra->sys_name, "md_d%d",
e86c9dd6
NB
113 -1-devnum);
114 }
7e0f6979 115 sprintf(fname, "/sys/block/%s/md/", sra->sys_name);
e86c9dd6
NB
116 base = fname + strlen(fname);
117
118 sra->devs = NULL;
8382f19b
NB
119 if (options & GET_VERSION) {
120 strcpy(base, "metadata_version");
121 if (load_sys(fname, buf))
122 goto abort;
294d6f45 123 if (strncmp(buf, "none", 4) == 0) {
7e0f6979
NB
124 sra->array.major_version =
125 sra->array.minor_version = -1;
294d6f45
NB
126 strcpy(sra->text_version, "");
127 } else if (strncmp(buf, "external:", 9) == 0) {
142cb9e1
NB
128 sra->array.major_version = -1;
129 sra->array.minor_version = -2;
130 strcpy(sra->text_version, buf+9);
b8ac1967 131 } else {
8382f19b 132 sscanf(buf, "%d.%d",
7e0f6979
NB
133 &sra->array.major_version,
134 &sra->array.minor_version);
b8ac1967
NB
135 strcpy(sra->text_version, buf);
136 }
8382f19b 137 }
e86c9dd6
NB
138 if (options & GET_LEVEL) {
139 strcpy(base, "level");
140 if (load_sys(fname, buf))
141 goto abort;
7e0f6979 142 sra->array.level = map_name(pers, buf);
e86c9dd6
NB
143 }
144 if (options & GET_LAYOUT) {
145 strcpy(base, "layout");
146 if (load_sys(fname, buf))
147 goto abort;
7e0f6979 148 sra->array.layout = strtoul(buf, NULL, 0);
e86c9dd6 149 }
549e9569
NB
150 if (options & GET_DISKS) {
151 strcpy(base, "raid_disks");
152 if (load_sys(fname, buf))
153 goto abort;
154 sra->array.raid_disks = strtoul(buf, NULL, 0);
155 }
e86c9dd6
NB
156 if (options & GET_COMPONENT) {
157 strcpy(base, "component_size");
158 if (load_sys(fname, buf))
159 goto abort;
160 sra->component_size = strtoull(buf, NULL, 0);
353632d9
NB
161 /* sysfs reports "K", but we want sectors */
162 sra->component_size *= 2;
e86c9dd6
NB
163 }
164 if (options & GET_CHUNK) {
165 strcpy(base, "chunk_size");
166 if (load_sys(fname, buf))
167 goto abort;
7e0f6979 168 sra->array.chunk_size = strtoul(buf, NULL, 0);
e86c9dd6 169 }
758d3a8e
NB
170 if (options & GET_CACHE) {
171 strcpy(base, "stripe_cache_size");
172 if (load_sys(fname, buf))
173 goto abort;
174 sra->cache_size = strtoul(buf, NULL, 0);
175 }
37dfc3d6
NB
176 if (options & GET_MISMATCH) {
177 strcpy(base, "mismatch_cnt");
178 if (load_sys(fname, buf))
179 goto abort;
180 sra->mismatch_cnt = strtoul(buf, NULL, 0);
181 }
e86c9dd6
NB
182
183 if (! (options & GET_DEVS))
184 return sra;
185
186 /* Get all the devices as well */
187 *base = 0;
188 dir = opendir(fname);
189 if (!dir)
190 goto abort;
7e0f6979 191 sra->array.spare_disks = 0;
e86c9dd6
NB
192
193 while ((de = readdir(dir)) != NULL) {
194 char *ep;
195 if (de->d_ino == 0 ||
196 strncmp(de->d_name, "dev-", 4) != 0)
197 continue;
198 strcpy(base, de->d_name);
199 dbase = base + strlen(base);
200 *dbase++ = '/';
201
202 dev = malloc(sizeof(*dev));
203 if (!dev)
204 goto abort;
205 dev->next = sra->devs;
206 sra->devs = dev;
06c7f68e 207 strcpy(dev->sys_name, de->d_name);
e86c9dd6
NB
208
209 /* Always get slot, major, minor */
210 strcpy(dbase, "slot");
211 if (load_sys(fname, buf))
212 goto abort;
06c7f68e
NB
213 dev->disk.raid_disk = strtoul(buf, &ep, 10);
214 if (*ep) dev->disk.raid_disk = -1;
e86c9dd6
NB
215
216 strcpy(dbase, "block/dev");
217 if (load_sys(fname, buf))
218 goto abort;
06c7f68e 219 sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
e86c9dd6
NB
220
221 if (options & GET_OFFSET) {
222 strcpy(dbase, "offset");
223 if (load_sys(fname, buf))
224 goto abort;
06c7f68e 225 dev->data_offset = strtoull(buf, NULL, 0);
e86c9dd6
NB
226 }
227 if (options & GET_SIZE) {
228 strcpy(dbase, "size");
229 if (load_sys(fname, buf))
230 goto abort;
047d2e49 231 dev->component_size = strtoull(buf, NULL, 0) * 2;
e86c9dd6
NB
232 }
233 if (options & GET_STATE) {
06c7f68e 234 dev->disk.state = 0;
e86c9dd6
NB
235 strcpy(dbase, "state");
236 if (load_sys(fname, buf))
237 goto abort;
238 if (strstr(buf, "in_sync"))
06c7f68e 239 dev->disk.state |= (1<<MD_DISK_SYNC);
e86c9dd6 240 if (strstr(buf, "faulty"))
06c7f68e
NB
241 dev->disk.state |= (1<<MD_DISK_FAULTY);
242 if (dev->disk.state == 0)
7e0f6979 243 sra->array.spare_disks++;
e86c9dd6
NB
244 }
245 if (options & GET_ERROR) {
246 strcpy(buf, "errors");
247 if (load_sys(fname, buf))
248 goto abort;
249 dev->errors = strtoul(buf, NULL, 0);
250 }
251 }
355726fa 252 closedir(dir);
e86c9dd6
NB
253 return sra;
254
255 abort:
355726fa
NB
256 if (dir)
257 closedir(dir);
8382f19b 258 sysfs_free(sra);
e86c9dd6
NB
259 return NULL;
260}
261
262unsigned long long get_component_size(int fd)
263{
264 /* Find out the component size of the array.
265 * We cannot trust GET_ARRAY_INFO ioctl as it's
266 * size field is only 32bits.
267 * So look in /sys/block/mdXXX/md/component_size
353632d9 268 *
8686f3ed 269 * This returns in units of sectors.
e86c9dd6
NB
270 */
271 struct stat stb;
272 char fname[50];
273 int n;
274 if (fstat(fd, &stb)) return 0;
275 if (major(stb.st_rdev) == 9)
276 sprintf(fname, "/sys/block/md%d/md/component_size",
ea24acd0 277 (int)minor(stb.st_rdev));
e86c9dd6
NB
278 else
279 sprintf(fname, "/sys/block/md_d%d/md/component_size",
ea24acd0 280 (int)minor(stb.st_rdev)>>MdpMinorShift);
e86c9dd6
NB
281 fd = open(fname, O_RDONLY);
282 if (fd < 0)
283 return 0;
284 n = read(fd, fname, sizeof(fname));
285 close(fd);
286 if (n == sizeof(fname))
287 return 0;
288 fname[n] = 0;
8686f3ed 289 return strtoull(fname, NULL, 10) * 2;
e86c9dd6
NB
290}
291
7e0f6979 292int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
e86c9dd6
NB
293 char *name, char *val)
294{
295 char fname[50];
296 int n;
297 int fd;
7e1432fb 298
e86c9dd6 299 sprintf(fname, "/sys/block/%s/md/%s/%s",
7e0f6979 300 sra->sys_name, dev?dev->sys_name:"", name);
e86c9dd6
NB
301 fd = open(fname, O_WRONLY);
302 if (fd < 0)
303 return -1;
304 n = write(fd, val, strlen(val));
305 close(fd);
306 if (n != strlen(val))
307 return -1;
308 return 0;
309}
310
7e0f6979 311int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
e86c9dd6
NB
312 char *name, unsigned long long val)
313{
314 char valstr[50];
315 sprintf(valstr, "%llu", val);
316 return sysfs_set_str(sra, dev, name, valstr);
317}
318
7e0f6979 319int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
e86c9dd6
NB
320 char *name, unsigned long long *val)
321{
322 char fname[50];
323 char buf[50];
324 int n;
325 int fd;
326 char *ep;
327 sprintf(fname, "/sys/block/%s/md/%s/%s",
7e0f6979 328 sra->sys_name, dev?dev->sys_name:"", name);
e86c9dd6
NB
329 fd = open(fname, O_RDONLY);
330 if (fd < 0)
331 return -1;
332 n = read(fd, buf, sizeof(buf));
333 close(fd);
334 if (n <= 0)
335 return -1;
336 buf[n] = 0;
337 *val = strtoull(buf, &ep, 0);
338 if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
339 return -1;
340 return 0;
341}
2503d23b
NB
342
343int sysfs_set_array(struct mdinfo *sra,
344 struct mdinfo *info)
345{
346 int rv = 0;
347 sra->array = info->array;
2f6079dc 348
2503d23b
NB
349 if (info->array.level < 0)
350 return 0; /* FIXME */
351 rv |= sysfs_set_str(sra, NULL, "level",
352 map_num(pers, info->array.level));
353 rv |= sysfs_set_num(sra, NULL, "raid_disks", info->array.raid_disks);
354 rv |= sysfs_set_num(sra, NULL, "chunk_size", info->array.chunk_size);
355 rv |= sysfs_set_num(sra, NULL, "layout", info->array.layout);
047d2e49 356 rv |= sysfs_set_num(sra, NULL, "component_size", info->component_size/2);
0fd5c350 357 rv |= sysfs_set_num(sra, NULL, "resync_start", info->resync_start);
2503d23b
NB
358 sra->array = info->array;
359 return rv;
360}
361
2318b9f0 362int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd)
2503d23b
NB
363{
364 char dv[100];
365 char nm[100];
366 struct mdinfo *sd2;
367 char *dname;
368 int rv;
369
370 sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
371 rv = sysfs_set_str(sra, NULL, "new_dev", dv);
372 if (rv)
373 return rv;
374
375 memset(nm, 0, sizeof(nm));
376 sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor);
73649188
N
377 rv = readlink(dv, nm, sizeof(nm));
378 if (rv <= 0)
2503d23b 379 return -1;
73649188 380 nm[rv] = '\0';
2503d23b
NB
381 dname = strrchr(nm, '/');
382 if (dname) dname++;
383 strcpy(sd->sys_name, "dev-");
384 strcpy(sd->sys_name+4, dname);
385
73649188 386 rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
2503d23b
NB
387 rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
388 if (sra->array.level != LEVEL_CONTAINER) {
389 rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
390// rv |= sysfs_set_str(sra, sd, "state", "in_sync");
391 }
3cb07116
NB
392 if (! rv) {
393 sd2 = malloc(sizeof(*sd2));
394 *sd2 = *sd;
395 sd2->next = sra->devs;
396 sra->devs = sd2;
397 }
2503d23b
NB
398 return rv;
399}
90c8b707
DW
400
401int sysfs_disk_to_sg(int fd)
402{
403 /* from an open block device, try find and open its corresponding
404 * scsi_generic interface
405 */
406 struct stat st;
407 char path[256];
408 char sg_path[256];
409 char sg_major_minor[8];
410 char *c;
411 DIR *dir;
412 struct dirent *de;
413 int major, minor, rv;
414
415 if (fstat(fd, &st))
416 return -1;
417
418 snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
419 major(st.st_rdev), minor(st.st_rdev));
420
421 dir = opendir(path);
422 if (!dir)
423 return -1;
424
425 de = readdir(dir);
426 while (de) {
427 if (strncmp("scsi_generic:", de->d_name,
428 strlen("scsi_generic:")) == 0)
429 break;
430 de = readdir(dir);
431 }
432 closedir(dir);
433
434 if (!de)
435 return -1;
436
437 snprintf(sg_path, sizeof(sg_path), "%s/%s/dev", path, de->d_name);
438 fd = open(sg_path, O_RDONLY);
439 if (fd < 0)
440 return fd;
441
442 rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
443 close(fd);
444 if (rv < 0)
445 return -1;
446 else
447 sg_major_minor[rv - 1] = '\0';
448
449 c = strchr(sg_major_minor, ':');
450 *c = '\0';
451 c++;
452 major = strtol(sg_major_minor, NULL, 10);
453 minor = strtol(c, NULL, 10);
454 snprintf(path, sizeof(path), "/dev/.tmp.md.%d:%d:%d",
455 (int) getpid(), major, minor);
456 if (mknod(path, S_IFCHR|0600, makedev(major, minor))==0) {
457 fd = open(path, O_RDONLY);
458 unlink(path);
459 return fd;
460 }
461
462 return -1;
463}
464
f1665f72
DW
465int sysfs_disk_to_scsi_id(int fd, __u32 *id)
466{
467 /* from an open block device, try to retrieve it scsi_id */
468 struct stat st;
469 char path[256];
470 char *c1, *c2;
471 DIR *dir;
472 struct dirent *de;
473
474 if (fstat(fd, &st))
475 return 1;
476
477 snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
478 major(st.st_rdev), minor(st.st_rdev));
479
480 dir = opendir(path);
481 if (!dir)
482 return 1;
483
484 de = readdir(dir);
485 while (de) {
486 if (strncmp("scsi_disk:", de->d_name,
487 strlen("scsi_disk:")) == 0)
488 break;
489 de = readdir(dir);
490 }
491 closedir(dir);
492
493 if (!de)
494 return 1;
495
496 c1 = strchr(de->d_name, ':');
497 c1++;
498 c2 = strchr(c1, ':');
499 *c2 = '\0';
500 *id = strtol(c1, NULL, 10) << 24; /* host */
501 c1 = c2 + 1;
502 c2 = strchr(c1, ':');
503 *c2 = '\0';
504 *id |= strtol(c1, NULL, 10) << 16; /* channel */
505 c1 = c2 + 1;
506 c2 = strchr(c1, ':');
507 *c2 = '\0';
508 *id |= strtol(c1, NULL, 10) << 8; /* lun */
509 c1 = c2 + 1;
510 *id |= strtol(c1, NULL, 10); /* id */
511
512 return 0;
513}
f94d52f4
NB
514
515
516int sysfs_unique_holder(int devnum, long rdev)
517{
518 /* Check that devnum is a holder of rdev,
519 * and is the only holder.
520 * we should be locked against races by
521 * an O_EXCL on devnum
522 */
523 DIR *dir;
524 struct dirent *de;
525 char dirname[100];
526 char l;
527 int found = 0;
528 sprintf(dirname, "/sys/dev/block/%d:%d/holders",
529 major(rdev), minor(rdev));
530 dir = opendir(dirname);
531 errno = ENOENT;
532 if (!dir)
533 return 0;
534 l = strlen(dirname);
535 while ((de = readdir(dir)) != NULL) {
536 char buf[10];
537 int n;
538 int mj, mn;
539 char c;
540 int fd;
541
542 if (de->d_ino == 0)
543 continue;
544 if (de->d_name[0] == '.')
545 continue;
546 strcpy(dirname+l, "/");
547 strcat(dirname+l, de->d_name);
548 strcat(dirname+l, "/dev");
549 fd = open(dirname, O_RDONLY);
550 if (fd < 0) {
551 errno = ENOENT;
552 break;
553 }
554 n = read(fd, buf, sizeof(buf)-1);
555 close(fd);
556 buf[n] = 0;
557 if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 ||
558 c != '\n') {
559 errno = ENOENT;
560 break;
561 }
562 if (mj != MD_MAJOR)
563 mn = -1-(mn>>6);
564
565 if (devnum != mn) {
566 errno = EEXIST;
567 break;
568 }
569 found = 1;
570 }
571 closedir(dir);
572 if (de)
573 return 0;
574 else
575 return found;
576}