]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Manage.c
Manage_subdevs(): avoid leaking super
[thirdparty/mdadm.git] / Manage.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_u.h"
27 #include "md_p.h"
28 #include <ctype.h>
29
30 #define REGISTER_DEV _IO (MD_MAJOR, 1)
31 #define START_MD _IO (MD_MAJOR, 2)
32 #define STOP_MD _IO (MD_MAJOR, 3)
33
34 int Manage_ro(char *devname, int fd, int readonly)
35 {
36 /* switch to readonly or rw
37 *
38 * requires >= 0.90.0
39 * first check that array is runing
40 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
41 *
42 */
43 mdu_array_info_t array;
44 #ifndef MDASSEMBLE
45 struct mdinfo *mdi;
46 #endif
47
48 if (md_get_version(fd) < 9000) {
49 fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
50 return 1;
51 }
52 #ifndef MDASSEMBLE
53 /* If this is an externally-manage array, we need to modify the
54 * metadata_version so that mdmon doesn't undo our change.
55 */
56 mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
57 if (mdi &&
58 mdi->array.major_version == -1 &&
59 is_subarray(mdi->text_version)) {
60 char vers[64];
61 strcpy(vers, "external:");
62 strcat(vers, mdi->text_version);
63 if (readonly > 0) {
64 int rv;
65 /* We set readonly ourselves. */
66 vers[9] = '-';
67 sysfs_set_str(mdi, NULL, "metadata_version", vers);
68
69 close(fd);
70 rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
71
72 if (rv < 0) {
73 fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
74 devname, strerror(errno));
75
76 vers[9] = mdi->text_version[0];
77 sysfs_set_str(mdi, NULL, "metadata_version", vers);
78 return 1;
79 }
80 } else {
81 char *cp;
82 /* We cannot set read/write - must signal mdmon */
83 vers[9] = '/';
84 sysfs_set_str(mdi, NULL, "metadata_version", vers);
85
86 cp = strchr(vers+10, '/');
87 if (*cp)
88 *cp = 0;
89 ping_monitor(vers+10);
90 if (mdi->array.level <= 0)
91 sysfs_set_str(mdi, NULL, "array_state", "active");
92 }
93 return 0;
94 }
95 #endif
96 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
97 fprintf(stderr, Name ": %s does not appear to be active.\n",
98 devname);
99 return 1;
100 }
101
102 if (readonly>0) {
103 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
104 fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
105 devname, strerror(errno));
106 return 1;
107 }
108 } else if (readonly < 0) {
109 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
110 fprintf(stderr, Name ": failed to set writable for %s: %s\n",
111 devname, strerror(errno));
112 return 1;
113 }
114 }
115 return 0;
116 }
117
118 #ifndef MDASSEMBLE
119
120 static void remove_devices(int devnum, char *path)
121 {
122 /*
123 * Remove names at 'path' - possibly with
124 * partition suffixes - which link to the 'standard'
125 * name for devnum. These were probably created
126 * by mdadm when the array was assembled.
127 */
128 char base[40];
129 char *path2;
130 char link[1024];
131 int n;
132 int part;
133 char *be;
134 char *pe;
135
136 if (!path)
137 return;
138
139 if (devnum >= 0)
140 sprintf(base, "/dev/md%d", devnum);
141 else
142 sprintf(base, "/dev/md_d%d", -1-devnum);
143 be = base + strlen(base);
144
145 path2 = malloc(strlen(path)+20);
146 strcpy(path2, path);
147 pe = path2 + strlen(path2);
148
149 for (part = 0; part < 16; part++) {
150 if (part) {
151 sprintf(be, "p%d", part);
152
153 if (isdigit(pe[-1]))
154 sprintf(pe, "p%d", part);
155 else
156 sprintf(pe, "%d", part);
157 }
158 n = readlink(path2, link, sizeof(link));
159 if (n > 0 && (int)strlen(base) == n &&
160 strncmp(link, base, n) == 0)
161 unlink(path2);
162 }
163 free(path2);
164 }
165
166
167 int Manage_runstop(char *devname, int fd, int runstop, int quiet)
168 {
169 /* Run or stop the array. array must already be configured
170 * required >= 0.90.0
171 * Only print failure messages if quiet == 0;
172 * quiet > 0 means really be quiet
173 * quiet < 0 means we will try again if it fails.
174 */
175 mdu_param_t param; /* unused */
176
177 if (runstop == -1 && md_get_version(fd) < 9000) {
178 if (ioctl(fd, STOP_MD, 0)) {
179 if (quiet == 0) fprintf(stderr,
180 Name ": stopping device %s "
181 "failed: %s\n",
182 devname, strerror(errno));
183 return 1;
184 }
185 }
186
187 if (md_get_version(fd) < 9000) {
188 fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
189 return 1;
190 }
191 /*
192 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
193 fprintf(stderr, Name ": %s does not appear to be active.\n",
194 devname);
195 return 1;
196 }
197 */
198 if (runstop>0) {
199 if (ioctl(fd, RUN_ARRAY, &param)) {
200 fprintf(stderr, Name ": failed to run array %s: %s\n",
201 devname, strerror(errno));
202 return 1;
203 }
204 if (quiet <= 0)
205 fprintf(stderr, Name ": started %s\n", devname);
206 } else if (runstop < 0){
207 struct map_ent *map = NULL;
208 struct stat stb;
209 struct mdinfo *mdi;
210 int devnum;
211 int err;
212 int count;
213 /* If this is an mdmon managed array, just write 'inactive'
214 * to the array state and let mdmon clear up.
215 */
216 devnum = fd2devnum(fd);
217 /* Get EXCL access first. If this fails, then attempting
218 * to stop is probably a bad idea.
219 */
220 close(fd);
221 fd = open(devname, O_RDONLY|O_EXCL);
222 if (fd < 0 || fd2devnum(fd) != devnum) {
223 if (fd >= 0)
224 close(fd);
225 fprintf(stderr,
226 Name ": Cannot get exclusive access to %s:"
227 "Perhaps a running "
228 "process, mounted filesystem "
229 "or active volume group?\n",
230 devname);
231 return 1;
232 }
233 mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
234 if (mdi &&
235 mdi->array.level > 0 &&
236 is_subarray(mdi->text_version)) {
237 int err;
238 /* This is mdmon managed. */
239 close(fd);
240
241 count = 25;
242 while (count &&
243 (err = sysfs_set_str(mdi, NULL,
244 "array_state",
245 "inactive")) < 0
246 && errno == EBUSY) {
247 usleep(200000);
248 count--;
249 }
250 if (err && !quiet) {
251 fprintf(stderr, Name
252 ": failed to stop array %s: %s\n",
253 devname, strerror(errno));
254 return 1;
255 }
256
257 /* Give monitor a chance to act */
258 ping_monitor(mdi->text_version);
259
260 fd = open_dev_excl(devnum);
261 if (fd < 0) {
262 fprintf(stderr, Name
263 ": failed to completely stop %s"
264 ": Device is busy\n",
265 devname);
266 return 1;
267 }
268 } else if (mdi &&
269 mdi->array.major_version == -1 &&
270 mdi->array.minor_version == -2 &&
271 !is_subarray(mdi->text_version)) {
272 struct mdstat_ent *mds, *m;
273 /* container, possibly mdmon-managed.
274 * Make sure mdmon isn't opening it, which
275 * would interfere with the 'stop'
276 */
277 ping_monitor(mdi->sys_name);
278
279 /* now check that there are no existing arrays
280 * which are members of this array
281 */
282 mds = mdstat_read(0, 0);
283 for (m=mds; m; m=m->next)
284 if (m->metadata_version &&
285 strncmp(m->metadata_version, "external:", 9)==0 &&
286 is_subarray(m->metadata_version+9) &&
287 devname2devnum(m->metadata_version+10) == devnum) {
288 if (!quiet)
289 fprintf(stderr, Name
290 ": Cannot stop container %s: "
291 "member %s still active\n",
292 devname, m->dev);
293 free_mdstat(mds);
294 if (mdi)
295 sysfs_free(mdi);
296 return 1;
297 }
298 }
299
300 /* As we have an O_EXCL open, any use of the device
301 * which blocks STOP_ARRAY is probably a transient use,
302 * so it is reasonable to retry for a while - 5 seconds.
303 */
304 count = 25; err = 0;
305 while (count && fd >= 0
306 && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
307 && errno == EBUSY) {
308 usleep(200000);
309 count --;
310 }
311 if (fd >= 0 && err) {
312 if (quiet == 0) {
313 fprintf(stderr, Name
314 ": failed to stop array %s: %s\n",
315 devname, strerror(errno));
316 if (errno == EBUSY)
317 fprintf(stderr, "Perhaps a running "
318 "process, mounted filesystem "
319 "or active volume group?\n");
320 }
321 if (mdi)
322 sysfs_free(mdi);
323 return 1;
324 }
325 /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
326 * was stopped, so We'll do it here just to be sure. Drop any
327 * partitions as well...
328 */
329 if (fd >= 0)
330 ioctl(fd, BLKRRPART, 0);
331 if (mdi)
332 sysfs_uevent(mdi, "change");
333
334
335 if (devnum != NoMdDev &&
336 (stat("/dev/.udev", &stb) != 0 ||
337 check_env("MDADM_NO_UDEV"))) {
338 struct map_ent *mp = map_by_devnum(&map, devnum);
339 remove_devices(devnum, mp ? mp->path : NULL);
340 }
341
342
343 if (quiet <= 0)
344 fprintf(stderr, Name ": stopped %s\n", devname);
345 map_lock(&map);
346 map_remove(&map, devnum);
347 map_unlock(&map);
348 }
349 return 0;
350 }
351
352 int Manage_resize(char *devname, int fd, long long size, int raid_disks)
353 {
354 mdu_array_info_t info;
355 if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
356 fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
357 devname, strerror(errno));
358 return 1;
359 }
360 if (size >= 0)
361 info.size = size;
362 if (raid_disks > 0)
363 info.raid_disks = raid_disks;
364 if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
365 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
366 devname, strerror(errno));
367 return 1;
368 }
369 return 0;
370 }
371
372 int Manage_subdevs(char *devname, int fd,
373 struct mddev_dev *devlist, int verbose, int test,
374 char *update, int force)
375 {
376 /* do something to each dev.
377 * devmode can be
378 * 'a' - add the device
379 * try HOT_ADD_DISK
380 * If that fails EINVAL, try ADD_NEW_DISK
381 * 'r' - remove the device HOT_REMOVE_DISK
382 * device can be 'faulty' or 'detached' in which case all
383 * matching devices are removed.
384 * 'f' - set the device faulty SET_DISK_FAULTY
385 * device can be 'detached' in which case any device that
386 * is inaccessible will be marked faulty.
387 * For 'f' and 'r', the device can also be a kernel-internal
388 * name such as 'sdb'.
389 */
390 struct mddev_dev *add_devlist = NULL;
391 mdu_array_info_t array;
392 mdu_disk_info_t disc;
393 unsigned long long array_size;
394 struct mddev_dev *dv, *next = NULL;
395 struct stat stb;
396 int j, jnext = 0;
397 int tfd = -1;
398 struct supertype *st, *tst;
399 char *subarray = NULL;
400 int duuid[4];
401 int ouuid[4];
402 int lfd = -1;
403 int sysfd = -1;
404 int count = 0; /* number of actions taken */
405
406 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
407 fprintf(stderr, Name ": cannot get array info for %s\n",
408 devname);
409 return 1;
410 }
411
412 /* array.size is only 32 bit and may be truncated.
413 * So read from sysfs if possible, and record number of sectors
414 */
415
416 array_size = get_component_size(fd);
417 if (array_size <= 0)
418 array_size = array.size * 2;
419
420 tst = super_by_fd(fd, &subarray);
421 if (!tst) {
422 fprintf(stderr, Name ": unsupport array - version %d.%d\n",
423 array.major_version, array.minor_version);
424 return 1;
425 }
426
427 stb.st_rdev = 0;
428 for (dv = devlist, j=0 ; dv; dv = next, j = jnext) {
429 unsigned long long ldsize;
430 char dvname[20];
431 char *dnprintable = dv->devname;
432 char *add_dev = dv->devname;
433 int err;
434 int re_add_failed = 0;
435
436 next = dv->next;
437 jnext = 0;
438
439 if (strcmp(dv->devname, "failed")==0 ||
440 strcmp(dv->devname, "faulty")==0) {
441 int remaining_disks = array.nr_disks;
442 if (dv->disposition != 'r') {
443 fprintf(stderr, Name ": %s only meaningful "
444 "with -r, not -%c\n",
445 dv->devname, dv->disposition);
446 return 1;
447 }
448 for (; j < 1024 && remaining_disks > 0; j++) {
449 unsigned dev;
450 disc.number = j;
451 if (ioctl(fd, GET_DISK_INFO, &disc))
452 continue;
453 if (disc.major == 0 && disc.minor == 0)
454 continue;
455 remaining_disks --;
456 if ((disc.state & 1) == 0) /* faulty */
457 continue;
458 dev = makedev(disc.major, disc.minor);
459 if (stb.st_rdev == dev)
460 /* already did that one */
461 continue;
462 stb.st_rdev = dev;
463 next = dv;
464 /* same slot again next time - things might
465 * have reshuffled */
466 jnext = j;
467 sprintf(dvname,"%d:%d", disc.major, disc.minor);
468 dnprintable = dvname;
469 break;
470 }
471 if (next != dv)
472 continue;
473 } else if (strcmp(dv->devname, "detached") == 0) {
474 int remaining_disks = array.nr_disks;
475 if (dv->disposition != 'r' && dv->disposition != 'f') {
476 fprintf(stderr, Name ": %s only meaningful "
477 "with -r of -f, not -%c\n",
478 dv->devname, dv->disposition);
479 return 1;
480 }
481 for (; j < 1024 && remaining_disks > 0; j++) {
482 int sfd;
483 unsigned dev;
484 disc.number = j;
485 if (ioctl(fd, GET_DISK_INFO, &disc))
486 continue;
487 if (disc.major == 0 && disc.minor == 0)
488 continue;
489 remaining_disks --;
490 sprintf(dvname,"%d:%d", disc.major, disc.minor);
491 sfd = dev_open(dvname, O_RDONLY);
492 if (sfd >= 0) {
493 close(sfd);
494 continue;
495 }
496 if (dv->disposition == 'f' &&
497 (disc.state & 1) == 1) /* already faulty */
498 continue;
499 if (errno != ENXIO)
500 continue;
501 dev = makedev(disc.major, disc.minor);
502 if (stb.st_rdev == dev)
503 /* already did that one */
504 continue;
505 stb.st_rdev = dev;
506 next = dv;
507 /* same slot again next time - things might
508 * have reshuffled */
509 jnext = j;
510 dnprintable = dvname;
511 break;
512 }
513 if (next != dv)
514 continue;
515 } else if (strcmp(dv->devname, "missing") == 0) {
516 if (dv->disposition != 'a' || dv->re_add == 0) {
517 fprintf(stderr, Name ": 'missing' only meaningful "
518 "with --re-add\n");
519 return 1;
520 }
521 if (add_devlist == NULL)
522 add_devlist = conf_get_devs();
523 if (add_devlist == NULL) {
524 fprintf(stderr, Name ": no devices to scan for missing members.");
525 continue;
526 }
527 add_dev = add_devlist->devname;
528 add_devlist = add_devlist->next;
529 if (add_devlist != NULL)
530 next = dv;
531 if (stat(add_dev, &stb) < 0)
532 continue;
533 } else if (strchr(dv->devname, '/') == NULL &&
534 strchr(dv->devname, ':') == NULL &&
535 strlen(dv->devname) < 50) {
536 /* Assume this is a kernel-internal name like 'sda1' */
537 int found = 0;
538 char dname[55];
539 if (dv->disposition != 'r' && dv->disposition != 'f') {
540 fprintf(stderr, Name ": %s only meaningful "
541 "with -r or -f, not -%c\n",
542 dv->devname, dv->disposition);
543 return 1;
544 }
545
546 sprintf(dname, "dev-%s", dv->devname);
547 sysfd = sysfs_open(fd2devnum(fd), dname, "block/dev");
548 if (sysfd >= 0) {
549 char dn[20];
550 int mj,mn;
551 if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
552 sscanf(dn, "%d:%d", &mj,&mn) == 2) {
553 stb.st_rdev = makedev(mj,mn);
554 found = 1;
555 }
556 close(sysfd);
557 sysfd = -1;
558 }
559 if (!found) {
560 sysfd = sysfs_open(fd2devnum(fd), dname, "state");
561 if (sysfd < 0) {
562 fprintf(stderr, Name ": %s does not appear "
563 "to be a component of %s\n",
564 dv->devname, devname);
565 return 1;
566 }
567 }
568 } else {
569 j = 0;
570
571 tfd = dev_open(dv->devname, O_RDONLY);
572 if (tfd < 0 && dv->disposition == 'r' &&
573 lstat(dv->devname, &stb) == 0)
574 /* Be happy, the lstat worked, that is
575 * enough for --remove
576 */
577 ;
578 else {
579 if (tfd < 0 || fstat(tfd, &stb) != 0) {
580 fprintf(stderr, Name ": cannot find %s: %s\n",
581 dv->devname, strerror(errno));
582 if (tfd >= 0)
583 close(tfd);
584 return 1;
585 }
586 close(tfd);
587 tfd = -1;
588 }
589 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
590 fprintf(stderr, Name ": %s is not a "
591 "block device.\n",
592 dv->devname);
593 return 1;
594 }
595 }
596 switch(dv->disposition){
597 default:
598 fprintf(stderr, Name ": internal error - devmode[%s]=%d\n",
599 dv->devname, dv->disposition);
600 return 1;
601 case 'a':
602 /* add the device */
603 if (subarray) {
604 fprintf(stderr, Name ": Cannot add disks to a"
605 " \'member\' array, perform this"
606 " operation on the parent container\n");
607 return 1;
608 }
609 /* Make sure it isn't in use (in 2.6 or later) */
610 tfd = dev_open(add_dev, O_RDONLY|O_EXCL|O_DIRECT);
611 if (tfd < 0 && add_dev != dv->devname)
612 continue;
613 if (tfd < 0) {
614 fprintf(stderr, Name ": Cannot open %s: %s\n",
615 dv->devname, strerror(errno));
616 return 1;
617 }
618
619 st = dup_super(tst);
620
621 if (array.not_persistent==0)
622 st->ss->load_super(st, tfd, NULL);
623
624 if (add_dev == dv->devname) {
625 if (!get_dev_size(tfd, dv->devname, &ldsize)) {
626 st->ss->free_super(st);
627 close(tfd);
628 return 1;
629 }
630 } else if (!get_dev_size(tfd, NULL, &ldsize)) {
631 st->ss->free_super(st);
632 close(tfd);
633 tfd = -1;
634 continue;
635 }
636
637 if (tst->ss->validate_geometry(
638 tst, array.level, array.layout,
639 array.raid_disks, NULL,
640 ldsize >> 9, NULL, NULL, 0) == 0) {
641 if (!force) {
642 fprintf(stderr, Name
643 ": %s is larger than %s can "
644 "effectively use.\n"
645 " Add --force is you "
646 "really wan to add this device.\n",
647 add_dev, devname);
648 st->ss->free_super(st);
649 close(tfd);
650 return 1;
651 }
652 fprintf(stderr, Name
653 ": %s is larger than %s can "
654 "effectively use.\n"
655 " Adding anyway as --force "
656 "was given.\n",
657 add_dev, devname);
658 }
659 if (!tst->ss->external &&
660 array.major_version == 0 &&
661 md_get_version(fd)%100 < 2) {
662 close(tfd);
663 st->ss->free_super(st);
664 tfd = -1;
665 if (ioctl(fd, HOT_ADD_DISK,
666 (unsigned long)stb.st_rdev)==0) {
667 if (verbose >= 0)
668 fprintf(stderr, Name ": hot added %s\n",
669 add_dev);
670 continue;
671 }
672
673 fprintf(stderr, Name ": hot add failed for %s: %s\n",
674 add_dev, strerror(errno));
675 return 1;
676 }
677
678 if (array.not_persistent == 0 || tst->ss->external) {
679
680 /* need to find a sample superblock to copy, and
681 * a spare slot to use.
682 * For 'external' array (well, container based),
683 * We can just load the metadata for the array.
684 */
685 if (tst->sb)
686 /* already loaded */;
687 else if (tst->ss->external) {
688 tst->ss->load_container(tst, fd, NULL);
689 } else for (j = 0; j < tst->max_devs; j++) {
690 char *dev;
691 int dfd;
692 disc.number = j;
693 if (ioctl(fd, GET_DISK_INFO, &disc))
694 continue;
695 if (disc.major==0 && disc.minor==0)
696 continue;
697 if ((disc.state & 4)==0) continue; /* sync */
698 /* Looks like a good device to try */
699 dev = map_dev(disc.major, disc.minor, 1);
700 if (!dev) continue;
701 dfd = dev_open(dev, O_RDONLY);
702 if (dfd < 0) continue;
703 if (tst->ss->load_super(tst, dfd,
704 NULL)) {
705 close(dfd);
706 continue;
707 }
708 close(dfd);
709 break;
710 }
711 /* FIXME this is a bad test to be using */
712 if (!tst->sb) {
713 close(tfd);
714 st->ss->free_super(st);
715 fprintf(stderr, Name ": cannot load array metadata from %s\n", devname);
716 return 1;
717 }
718
719 /* Make sure device is large enough */
720 if (tst->ss->avail_size(tst, ldsize/512) <
721 array_size) {
722 close(tfd);
723 tfd = -1;
724 st->ss->free_super(st);
725 if (add_dev != dv->devname)
726 continue;
727 fprintf(stderr, Name ": %s not large enough to join array\n",
728 dv->devname);
729 return 1;
730 }
731
732 /* Possibly this device was recently part of the array
733 * and was temporarily removed, and is now being re-added.
734 * If so, we can simply re-add it.
735 */
736 tst->ss->uuid_from_super(tst, duuid);
737
738 if (st->sb) {
739 struct mdinfo mdi;
740 st->ss->getinfo_super(st, &mdi, NULL);
741 st->ss->uuid_from_super(st, ouuid);
742 if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
743 !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
744 memcmp(duuid, ouuid, sizeof(ouuid))==0) {
745 /* look like it is worth a try. Need to
746 * make sure kernel will accept it though.
747 */
748 /* re-add doesn't work for version-1 superblocks
749 * before 2.6.18 :-(
750 */
751 if (array.major_version == 1 &&
752 get_linux_version() <= 2006018)
753 goto skip_re_add;
754 disc.number = mdi.disk.number;
755 if (ioctl(fd, GET_DISK_INFO, &disc) != 0
756 || disc.major != 0 || disc.minor != 0
757 || !enough_fd(fd))
758 goto skip_re_add;
759 disc.major = major(stb.st_rdev);
760 disc.minor = minor(stb.st_rdev);
761 disc.number = mdi.disk.number;
762 disc.raid_disk = mdi.disk.raid_disk;
763 disc.state = mdi.disk.state;
764 if (dv->writemostly == 1)
765 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
766 if (dv->writemostly == 2)
767 disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
768 remove_partitions(tfd);
769 close(tfd);
770 tfd = -1;
771 if (update || dv->writemostly > 0) {
772 int rv = -1;
773 tfd = dev_open(dv->devname, O_RDWR);
774 if (tfd < 0) {
775 fprintf(stderr, Name ": failed to open %s for"
776 " superblock update during re-add\n", dv->devname);
777 st->ss->free_super(st);
778 return 1;
779 }
780
781 if (dv->writemostly == 1)
782 rv = st->ss->update_super(
783 st, NULL, "writemostly",
784 devname, verbose, 0, NULL);
785 if (dv->writemostly == 2)
786 rv = st->ss->update_super(
787 st, NULL, "readwrite",
788 devname, verbose, 0, NULL);
789 if (update)
790 rv = st->ss->update_super(
791 st, NULL, update,
792 devname, verbose, 0, NULL);
793 if (rv == 0)
794 rv = st->ss->store_super(st, tfd);
795 close(tfd);
796 tfd = -1;
797 if (rv != 0) {
798 fprintf(stderr, Name ": failed to update"
799 " superblock during re-add\n");
800 st->ss->free_super(st);
801 return 1;
802 }
803 }
804 /* don't even try if disk is marked as faulty */
805 errno = 0;
806 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
807 if (verbose >= 0)
808 fprintf(stderr, Name ": re-added %s\n", add_dev);
809 count++;
810 st->ss->free_super(st);
811 continue;
812 }
813 if (errno == ENOMEM || errno == EROFS) {
814 fprintf(stderr, Name ": add new device failed for %s: %s\n",
815 add_dev, strerror(errno));
816 st->ss->free_super(st);
817 if (add_dev != dv->devname)
818 continue;
819 return 1;
820 }
821 skip_re_add:
822 re_add_failed = 1;
823 }
824 st->ss->free_super(st);
825 }
826 if (add_dev != dv->devname) {
827 if (verbose > 0)
828 fprintf(stderr, Name
829 ": --re-add for %s to %s is not possible\n",
830 add_dev, devname);
831 if (tfd >= 0) {
832 close(tfd);
833 tfd = -1;
834 }
835 continue;
836 }
837 if (dv->re_add) {
838 if (tfd >= 0)
839 close(tfd);
840 fprintf(stderr, Name
841 ": --re-add for %s to %s is not possible\n",
842 dv->devname, devname);
843 return 1;
844 }
845 if (re_add_failed) {
846 fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
847 dv->devname, devname);
848 fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
849 dv->devname);
850 fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",
851 dv->devname);
852 if (tfd >= 0)
853 close(tfd);
854 return 1;
855 }
856 } else {
857 /* non-persistent. Must ensure that new drive
858 * is at least array.size big.
859 */
860 if (ldsize/512 < array_size) {
861 fprintf(stderr, Name ": %s not large enough to join array\n",
862 dv->devname);
863 if (tfd >= 0)
864 close(tfd);
865 return 1;
866 }
867 }
868 /* committed to really trying this device now*/
869 if (tfd >= 0) {
870 remove_partitions(tfd);
871 close(tfd);
872 tfd = -1;
873 }
874 /* in 2.6.17 and earlier, version-1 superblocks won't
875 * use the number we write, but will choose a free number.
876 * we must choose the same free number, which requires
877 * starting at 'raid_disks' and counting up
878 */
879 for (j = array.raid_disks; j< tst->max_devs; j++) {
880 disc.number = j;
881 if (ioctl(fd, GET_DISK_INFO, &disc))
882 break;
883 if (disc.major==0 && disc.minor==0)
884 break;
885 if (disc.state & 8) /* removed */
886 break;
887 }
888 disc.major = major(stb.st_rdev);
889 disc.minor = minor(stb.st_rdev);
890 disc.number =j;
891 disc.state = 0;
892 if (array.not_persistent==0) {
893 int dfd;
894 if (dv->writemostly == 1)
895 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
896 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
897 if (tst->ss->add_to_super(tst, &disc, dfd,
898 dv->devname)) {
899 close(dfd);
900 return 1;
901 }
902 if (tst->ss->write_init_super(tst)) {
903 close(dfd);
904 return 1;
905 }
906 } else if (dv->re_add) {
907 /* this had better be raid1.
908 * As we are "--re-add"ing we must find a spare slot
909 * to fill.
910 */
911 char *used = malloc(array.raid_disks);
912 memset(used, 0, array.raid_disks);
913 for (j=0; j< tst->max_devs; j++) {
914 mdu_disk_info_t disc2;
915 disc2.number = j;
916 if (ioctl(fd, GET_DISK_INFO, &disc2))
917 continue;
918 if (disc2.major==0 && disc2.minor==0)
919 continue;
920 if (disc2.state & 8) /* removed */
921 continue;
922 if (disc2.raid_disk < 0)
923 continue;
924 if (disc2.raid_disk > array.raid_disks)
925 continue;
926 used[disc2.raid_disk] = 1;
927 }
928 for (j=0 ; j<array.raid_disks; j++)
929 if (!used[j]) {
930 disc.raid_disk = j;
931 disc.state |= (1<<MD_DISK_SYNC);
932 break;
933 }
934 free(used);
935 }
936 if (dv->writemostly == 1)
937 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
938 if (tst->ss->external) {
939 /* add a disk
940 * to an external metadata container */
941 struct mdinfo new_mdi;
942 struct mdinfo *sra;
943 int container_fd;
944 int devnum = fd2devnum(fd);
945 int dfd;
946
947 container_fd = open_dev_excl(devnum);
948 if (container_fd < 0) {
949 fprintf(stderr, Name ": add failed for %s:"
950 " could not get exclusive access to container\n",
951 dv->devname);
952 tst->ss->free_super(tst);
953 return 1;
954 }
955
956 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
957 if (mdmon_running(tst->container_dev))
958 tst->update_tail = &tst->updates;
959 if (tst->ss->add_to_super(tst, &disc, dfd,
960 dv->devname)) {
961 close(dfd);
962 close(container_fd);
963 return 1;
964 }
965 if (tst->update_tail)
966 flush_metadata_updates(tst);
967 else
968 tst->ss->sync_metadata(tst);
969
970 sra = sysfs_read(container_fd, -1, 0);
971 if (!sra) {
972 fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
973 dv->devname);
974 close(container_fd);
975 tst->ss->free_super(tst);
976 return 1;
977 }
978 sra->array.level = LEVEL_CONTAINER;
979 /* Need to set data_offset and component_size */
980 tst->ss->getinfo_super(tst, &new_mdi, NULL);
981 new_mdi.disk.major = disc.major;
982 new_mdi.disk.minor = disc.minor;
983 new_mdi.recovery_start = 0;
984 /* Make sure fds are closed as they are O_EXCL which
985 * would block add_disk */
986 tst->ss->free_super(tst);
987 if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
988 fprintf(stderr, Name ": add new device to external metadata"
989 " failed for %s\n", dv->devname);
990 close(container_fd);
991 sysfs_free(sra);
992 return 1;
993 }
994 ping_monitor_by_id(devnum);
995 sysfs_free(sra);
996 close(container_fd);
997 } else {
998 tst->ss->free_super(tst);
999 if (ioctl(fd, ADD_NEW_DISK, &disc)) {
1000 fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
1001 dv->devname, j, strerror(errno));
1002 return 1;
1003 }
1004 }
1005 if (verbose >= 0)
1006 fprintf(stderr, Name ": added %s\n", dv->devname);
1007 break;
1008
1009 case 'r':
1010 /* hot remove */
1011 if (subarray) {
1012 fprintf(stderr, Name ": Cannot remove disks from a"
1013 " \'member\' array, perform this"
1014 " operation on the parent container\n");
1015 if (sysfd >= 0)
1016 close(sysfd);
1017 return 1;
1018 }
1019 if (tst->ss->external) {
1020 /* To remove a device from a container, we must
1021 * check that it isn't in use in an array.
1022 * This involves looking in the 'holders'
1023 * directory - there must be just one entry,
1024 * the container.
1025 * To ensure that it doesn't get used as a
1026 * hold spare while we are checking, we
1027 * get an O_EXCL open on the container
1028 */
1029 int dnum = fd2devnum(fd);
1030 lfd = open_dev_excl(dnum);
1031 if (lfd < 0) {
1032 fprintf(stderr, Name
1033 ": Cannot get exclusive access "
1034 " to container - odd\n");
1035 if (sysfd >= 0)
1036 close(sysfd);
1037 return 1;
1038 }
1039 /* in the detached case it is not possible to
1040 * check if we are the unique holder, so just
1041 * rely on the 'detached' checks
1042 */
1043 if (strcmp(dv->devname, "detached") == 0 ||
1044 sysfd >= 0 ||
1045 sysfs_unique_holder(dnum, stb.st_rdev))
1046 /* pass */;
1047 else {
1048 fprintf(stderr, Name
1049 ": %s is %s, cannot remove.\n",
1050 dnprintable,
1051 errno == EEXIST ? "still in use":
1052 "not a member");
1053 close(lfd);
1054 return 1;
1055 }
1056 }
1057 /* FIXME check that it is a current member */
1058 if (sysfd >= 0) {
1059 /* device has been removed and we don't know
1060 * the major:minor number
1061 */
1062 int n = write(sysfd, "remove", 6);
1063 if (n != 6)
1064 err = -1;
1065 else
1066 err = 0;
1067 close(sysfd);
1068 sysfd = -1;
1069 } else {
1070 err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
1071 if (err && errno == ENODEV) {
1072 /* Old kernels rejected this if no personality
1073 * registered */
1074 struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
1075 struct mdinfo *dv = NULL;
1076 if (sra)
1077 dv = sra->devs;
1078 for ( ; dv ; dv=dv->next)
1079 if (dv->disk.major == (int)major(stb.st_rdev) &&
1080 dv->disk.minor == (int)minor(stb.st_rdev))
1081 break;
1082 if (dv)
1083 err = sysfs_set_str(sra, dv,
1084 "state", "remove");
1085 else
1086 err = -1;
1087 if (sra)
1088 sysfs_free(sra);
1089 }
1090 }
1091 if (err) {
1092 fprintf(stderr, Name ": hot remove failed "
1093 "for %s: %s\n", dnprintable,
1094 strerror(errno));
1095 if (lfd >= 0)
1096 close(lfd);
1097 return 1;
1098 }
1099 if (tst->ss->external) {
1100 /*
1101 * Before dropping our exclusive open we make an
1102 * attempt at preventing mdmon from seeing an
1103 * 'add' event before reconciling this 'remove'
1104 * event.
1105 */
1106 char *name = devnum2devname(fd2devnum(fd));
1107
1108 if (!name) {
1109 fprintf(stderr, Name ": unable to get container name\n");
1110 return 1;
1111 }
1112
1113 ping_manager(name);
1114 free(name);
1115 }
1116 if (lfd >= 0)
1117 close(lfd);
1118 count++;
1119 if (verbose >= 0)
1120 fprintf(stderr, Name ": hot removed %s from %s\n",
1121 dnprintable, devname);
1122 break;
1123
1124 case 'f': /* set faulty */
1125 /* FIXME check current member */
1126 if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
1127 (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
1128 (unsigned long) stb.st_rdev))) {
1129 fprintf(stderr, Name ": set device faulty failed for %s: %s\n",
1130 dnprintable, strerror(errno));
1131 if (sysfd >= 0)
1132 close(sysfd);
1133 return 1;
1134 }
1135 if (sysfd >= 0)
1136 close(sysfd);
1137 sysfd = -1;
1138 count++;
1139 if (verbose >= 0)
1140 fprintf(stderr, Name ": set %s faulty in %s\n",
1141 dnprintable, devname);
1142 break;
1143 }
1144 }
1145 if (test && count == 0)
1146 return 2;
1147 return 0;
1148 }
1149
1150 int autodetect(void)
1151 {
1152 /* Open any md device, and issue the RAID_AUTORUN ioctl */
1153 int rv = 1;
1154 int fd = dev_open("9:0", O_RDONLY);
1155 if (fd >= 0) {
1156 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
1157 rv = 0;
1158 close(fd);
1159 }
1160 return rv;
1161 }
1162
1163 int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet)
1164 {
1165 struct supertype supertype, *st = &supertype;
1166 int fd, rv = 2;
1167
1168 memset(st, 0, sizeof(*st));
1169
1170 fd = open_subarray(dev, subarray, st, quiet);
1171 if (fd < 0)
1172 return 2;
1173
1174 if (!st->ss->update_subarray) {
1175 if (!quiet)
1176 fprintf(stderr,
1177 Name ": Operation not supported for %s metadata\n",
1178 st->ss->name);
1179 goto free_super;
1180 }
1181
1182 if (mdmon_running(st->devnum))
1183 st->update_tail = &st->updates;
1184
1185 rv = st->ss->update_subarray(st, subarray, update, ident);
1186
1187 if (rv) {
1188 if (!quiet)
1189 fprintf(stderr, Name ": Failed to update %s of subarray-%s in %s\n",
1190 update, subarray, dev);
1191 } else if (st->update_tail)
1192 flush_metadata_updates(st);
1193 else
1194 st->ss->sync_metadata(st);
1195
1196 if (rv == 0 && strcmp(update, "name") == 0 && !quiet)
1197 fprintf(stderr,
1198 Name ": Updated subarray-%s name from %s, UUIDs may have changed\n",
1199 subarray, dev);
1200
1201 free_super:
1202 st->ss->free_super(st);
1203 close(fd);
1204
1205 return rv;
1206 }
1207
1208 /* Move spare from one array to another
1209 * If adding to destination array fails
1210 * add back to original array
1211 * Returns 1 on success, 0 on failure */
1212 int move_spare(char *from_devname, char *to_devname, dev_t devid)
1213 {
1214 struct mddev_dev devlist;
1215 char devname[20];
1216
1217 /* try to remove and add */
1218 int fd1 = open(to_devname, O_RDONLY);
1219 int fd2 = open(from_devname, O_RDONLY);
1220
1221 if (fd1 < 0 || fd2 < 0) {
1222 if (fd1>=0) close(fd1);
1223 if (fd2>=0) close(fd2);
1224 return 0;
1225 }
1226
1227 devlist.next = NULL;
1228 devlist.used = 0;
1229 devlist.re_add = 0;
1230 devlist.writemostly = 0;
1231 devlist.devname = devname;
1232 sprintf(devname, "%d:%d", major(devid), minor(devid));
1233
1234 devlist.disposition = 'r';
1235 if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
1236 devlist.disposition = 'a';
1237 if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
1238 /* make sure manager is aware of changes */
1239 ping_manager(to_devname);
1240 ping_manager(from_devname);
1241 close(fd1);
1242 close(fd2);
1243 return 1;
1244 }
1245 else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
1246 }
1247 close(fd1);
1248 close(fd2);
1249 return 0;
1250 }
1251 #endif