]>
git.ipfire.org Git - thirdparty/mdadm.git/blob - Manage.c
2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
30 #define REGISTER_DEV _IO (MD_MAJOR, 1)
31 #define START_MD _IO (MD_MAJOR, 2)
32 #define STOP_MD _IO (MD_MAJOR, 3)
34 int Manage_ro(char *devname
, int fd
, int readonly
)
36 /* switch to readonly or rw
39 * first check that array is runing
40 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
43 mdu_array_info_t array
;
48 if (md_get_version(fd
) < 9000) {
49 fprintf(stderr
, Name
": need md driver version 0.90.0 or later\n");
53 /* If this is an externally-manage array, we need to modify the
54 * metadata_version so that mdmon doesn't undo our change.
56 mdi
= sysfs_read(fd
, -1, GET_LEVEL
|GET_VERSION
);
58 mdi
->array
.major_version
== -1 &&
59 mdi
->array
.level
> 0 &&
60 is_subarray(mdi
->text_version
)) {
62 strcpy(vers
, "external:");
63 strcat(vers
, mdi
->text_version
);
66 /* We set readonly ourselves. */
68 sysfs_set_str(mdi
, NULL
, "metadata_version", vers
);
71 rv
= sysfs_set_str(mdi
, NULL
, "array_state", "readonly");
74 fprintf(stderr
, Name
": failed to set readonly for %s: %s\n",
75 devname
, strerror(errno
));
77 vers
[9] = mdi
->text_version
[0];
78 sysfs_set_str(mdi
, NULL
, "metadata_version", vers
);
83 /* We cannot set read/write - must signal mdmon */
85 sysfs_set_str(mdi
, NULL
, "metadata_version", vers
);
87 cp
= strchr(vers
+10, '/');
90 ping_monitor(vers
+10);
95 if (ioctl(fd
, GET_ARRAY_INFO
, &array
)) {
96 fprintf(stderr
, Name
": %s does not appear to be active.\n",
102 if (ioctl(fd
, STOP_ARRAY_RO
, NULL
)) {
103 fprintf(stderr
, Name
": failed to set readonly for %s: %s\n",
104 devname
, strerror(errno
));
107 } else if (readonly
< 0) {
108 if (ioctl(fd
, RESTART_ARRAY_RW
, NULL
)) {
109 fprintf(stderr
, Name
": failed to set writable for %s: %s\n",
110 devname
, strerror(errno
));
119 static void remove_devices(int devnum
, char *path
)
121 /* Remove all 'standard' devices for 'devnum', including
122 * partitions. Also remove names at 'path' - possibly with
123 * partition suffixes - which link to those names.
134 sprintf(base
, "/dev/md%d", devnum
);
136 sprintf(base
, "/dev/md_d%d", -1-devnum
);
137 be
= base
+ strlen(base
);
139 path2
= malloc(strlen(path
)+20);
141 pe
= path2
+ strlen(path2
);
145 for (part
= 0; part
< 16; part
++) {
147 sprintf(be
, "p%d", part
);
150 sprintf(pe
, "p%d", part
);
152 sprintf(pe
, "%d", part
);
155 /* FIXME test if really is md device ?? */
158 n
= readlink(path2
, link
, sizeof(link
));
159 if (n
&& strlen(base
) == n
&&
160 strncmp(link
, base
, n
) == 0)
168 int Manage_runstop(char *devname
, int fd
, int runstop
, int quiet
)
170 /* Run or stop the array. array must already be configured
172 * Only print failure messages if quiet == 0;
173 * quiet > 0 means really be quiet
174 * quiet < 0 means we will try again if it fails.
176 mdu_param_t param
; /* unused */
178 if (runstop
== -1 && md_get_version(fd
) < 9000) {
179 if (ioctl(fd
, STOP_MD
, 0)) {
180 if (quiet
== 0) fprintf(stderr
,
181 Name
": stopping device %s "
183 devname
, strerror(errno
));
188 if (md_get_version(fd
) < 9000) {
189 fprintf(stderr
, Name
": need md driver version 0.90.0 or later\n");
193 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
194 fprintf(stderr, Name ": %s does not appear to be active.\n",
200 if (ioctl(fd
, RUN_ARRAY
, ¶m
)) {
201 fprintf(stderr
, Name
": failed to run array %s: %s\n",
202 devname
, strerror(errno
));
206 fprintf(stderr
, Name
": started %s\n", devname
);
207 } else if (runstop
< 0){
208 struct map_ent
*map
= NULL
;
212 /* If this is an mdmon managed array, just write 'inactive'
213 * to the array state and let mdmon clear up.
215 devnum
= fd2devnum(fd
);
216 mdi
= sysfs_read(fd
, -1, GET_LEVEL
|GET_VERSION
);
218 mdi
->array
.level
> 0 &&
219 is_subarray(mdi
->text_version
)) {
220 /* This is mdmon managed. */
222 if (sysfs_set_str(mdi
, NULL
,
223 "array_state", "inactive") < 0) {
226 ": failed to stop array %s: %s\n",
227 devname
, strerror(errno
));
231 /* Give monitor a chance to act */
232 ping_monitor(mdi
->text_version
);
234 fd
= open(devname
, O_RDONLY
);
236 mdi
->array
.major_version
== -1 &&
237 mdi
->array
.minor_version
== -2 &&
238 !is_subarray(mdi
->text_version
)) {
239 struct mdstat_ent
*mds
, *m
;
240 /* container, possibly mdmon-managed.
241 * Make sure mdmon isn't opening it, which
242 * would interfere with the 'stop'
244 ping_monitor(mdi
->sys_name
);
246 /* now check that there are no existing arrays
247 * which are members of this array
249 mds
= mdstat_read(0, 0);
250 for (m
=mds
; m
; m
=m
->next
)
251 if (m
->metadata_version
&&
252 strncmp(m
->metadata_version
, "external:", 9)==0 &&
253 is_subarray(m
->metadata_version
+9) &&
254 devname2devnum(m
->metadata_version
+10) == devnum
) {
257 ": Cannot stop container %s: "
258 "member %s still active\n",
267 if (fd
>= 0 && ioctl(fd
, STOP_ARRAY
, NULL
)) {
270 ": failed to stop array %s: %s\n",
271 devname
, strerror(errno
));
273 fprintf(stderr
, "Perhaps a running "
274 "process, mounted filesystem "
275 "or active volume group?\n");
281 /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
282 * was stopped, so We'll do it here just to be sure. Drop any
283 * partitions as well...
286 ioctl(fd
, BLKRRPART
, 0);
288 sysfs_uevent(mdi
, "change");
291 if (devnum
!= NoMdDev
&&
292 (stat("/dev/.udev", &stb
) != 0 ||
293 check_env("MDADM_NO_UDEV"))) {
294 struct map_ent
*mp
= map_by_devnum(&map
, devnum
);
295 remove_devices(devnum
, mp
? mp
->path
: NULL
);
300 fprintf(stderr
, Name
": stopped %s\n", devname
);
302 map_remove(&map
, devnum
);
308 int Manage_resize(char *devname
, int fd
, long long size
, int raid_disks
)
310 mdu_array_info_t info
;
311 if (ioctl(fd
, GET_ARRAY_INFO
, &info
) != 0) {
312 fprintf(stderr
, Name
": Cannot get array information for %s: %s\n",
313 devname
, strerror(errno
));
319 info
.raid_disks
= raid_disks
;
320 if (ioctl(fd
, SET_ARRAY_INFO
, &info
) != 0) {
321 fprintf(stderr
, Name
": Cannot set device size/shape for %s: %s\n",
322 devname
, strerror(errno
));
328 int Manage_subdevs(char *devname
, int fd
,
329 mddev_dev_t devlist
, int verbose
)
331 /* do something to each dev.
333 * 'a' - add the device
335 * If that fails EINVAL, try ADD_NEW_DISK
336 * 'r' - remove the device HOT_REMOVE_DISK
337 * device can be 'faulty' or 'detached' in which case all
338 * matching devices are removed.
339 * 'f' - set the device faulty SET_DISK_FAULTY
340 * device can be 'detached' in which case any device that
341 * is inaccessible will be marked faulty.
342 * For 'f' and 'r', the device can also be a kernel-internal
343 * name such as 'sdb'.
345 mdu_array_info_t array
;
346 mdu_disk_info_t disc
;
347 unsigned long long array_size
;
348 mddev_dev_t dv
, next
= NULL
;
352 struct supertype
*st
, *tst
;
358 if (ioctl(fd
, GET_ARRAY_INFO
, &array
)) {
359 fprintf(stderr
, Name
": cannot get array info for %s\n",
364 /* array.size is only 32 bit and may be truncated.
365 * So read from sysfs if possible, and record number of sectors
368 array_size
= get_component_size(fd
);
370 array_size
= array
.size
* 2;
372 tst
= super_by_fd(fd
);
374 fprintf(stderr
, Name
": unsupport array - version %d.%d\n",
375 array
.major_version
, array
.minor_version
);
379 for (dv
= devlist
, j
=0 ; dv
; dv
= next
, j
= jnext
) {
380 unsigned long long ldsize
;
382 char *dnprintable
= dv
->devname
;
388 if (strcmp(dv
->devname
, "failed")==0 ||
389 strcmp(dv
->devname
, "faulty")==0) {
390 if (dv
->disposition
!= 'r') {
391 fprintf(stderr
, Name
": %s only meaningful "
392 "with -r, not -%c\n",
393 dv
->devname
, dv
->disposition
);
396 for (; j
< array
.raid_disks
+ array
.nr_disks
; j
++) {
398 if (ioctl(fd
, GET_DISK_INFO
, &disc
))
400 if (disc
.major
== 0 && disc
.minor
== 0)
402 if ((disc
.state
& 1) == 0) /* faulty */
404 stb
.st_rdev
= makedev(disc
.major
, disc
.minor
);
407 sprintf(dvname
,"%d:%d", disc
.major
, disc
.minor
);
408 dnprintable
= dvname
;
413 } else if (strcmp(dv
->devname
, "detached") == 0) {
414 if (dv
->disposition
!= 'r' && dv
->disposition
!= 'f') {
415 fprintf(stderr
, Name
": %s only meaningful "
416 "with -r of -f, not -%c\n",
417 dv
->devname
, dv
->disposition
);
420 for (; j
< array
.raid_disks
+ array
.nr_disks
; j
++) {
423 if (ioctl(fd
, GET_DISK_INFO
, &disc
))
425 if (disc
.major
== 0 && disc
.minor
== 0)
427 sprintf(dvname
,"%d:%d", disc
.major
, disc
.minor
);
428 sfd
= dev_open(dvname
, O_RDONLY
);
433 if (dv
->disposition
== 'f' &&
434 (disc
.state
& 1) == 1) /* already faulty */
438 stb
.st_rdev
= makedev(disc
.major
, disc
.minor
);
441 dnprintable
= dvname
;
446 } else if (strchr(dv
->devname
, '/') == NULL
&&
447 strlen(dv
->devname
) < 50) {
448 /* Assume this is a kernel-internal name like 'sda1' */
451 if (dv
->disposition
!= 'r' && dv
->disposition
!= 'f') {
452 fprintf(stderr
, Name
": %s only meaningful "
453 "with -r of -f, not -%c\n",
454 dv
->devname
, dv
->disposition
);
458 sprintf(dname
, "dev-%s", dv
->devname
);
459 sysfd
= sysfs_open(fd2devnum(fd
), dname
, "block/dev");
463 if (sysfs_fd_get_str(sysfd
, dn
, 20) > 0 &&
464 sscanf(dn
, "%d:%d", &mj
,&mn
) == 2) {
465 stb
.st_rdev
= makedev(mj
,mn
);
472 sysfd
= sysfs_open(fd2devnum(fd
), dname
, "state");
474 fprintf(stderr
, Name
": %s does not appear "
475 "to be a component of %s\n",
476 dv
->devname
, devname
);
483 tfd
= dev_open(dv
->devname
, O_RDONLY
);
484 if (tfd
< 0 && dv
->disposition
== 'r' &&
485 lstat(dv
->devname
, &stb
) == 0)
486 /* Be happy, the lstat worked, that is
487 * enough for --remove
491 if (tfd
< 0 || fstat(tfd
, &stb
) != 0) {
492 fprintf(stderr
, Name
": cannot find %s: %s\n",
493 dv
->devname
, strerror(errno
));
500 if ((stb
.st_mode
& S_IFMT
) != S_IFBLK
) {
501 fprintf(stderr
, Name
": %s is not a "
507 switch(dv
->disposition
){
509 fprintf(stderr
, Name
": internal error - devmode[%s]=%d\n",
510 dv
->devname
, dv
->disposition
);
514 if (tst
->subarray
[0]) {
515 fprintf(stderr
, Name
": Cannot add disks to a"
516 " \'member\' array, perform this"
517 " operation on the parent container\n");
520 /* Make sure it isn't in use (in 2.6 or later) */
521 tfd
= dev_open(dv
->devname
, O_RDONLY
|O_EXCL
|O_DIRECT
);
523 fprintf(stderr
, Name
": Cannot open %s: %s\n",
524 dv
->devname
, strerror(errno
));
527 remove_partitions(tfd
);
531 if (array
.not_persistent
==0)
532 st
->ss
->load_super(st
, tfd
, NULL
);
534 if (!get_dev_size(tfd
, dv
->devname
, &ldsize
)) {
541 if (!tst
->ss
->external
&&
542 array
.major_version
== 0 &&
543 md_get_version(fd
)%100 < 2) {
544 if (ioctl(fd
, HOT_ADD_DISK
,
545 (unsigned long)stb
.st_rdev
)==0) {
547 fprintf(stderr
, Name
": hot added %s\n",
552 fprintf(stderr
, Name
": hot add failed for %s: %s\n",
553 dv
->devname
, strerror(errno
));
557 if (array
.not_persistent
== 0 || tst
->ss
->external
) {
559 /* need to find a sample superblock to copy, and
560 * a spare slot to use.
561 * For 'external' array (well, container based),
562 * We can just load the metadata for the array.
564 if (tst
->ss
->external
) {
565 tst
->ss
->load_super(tst
, fd
, NULL
);
566 } else for (j
= 0; j
< tst
->max_devs
; j
++) {
570 if (ioctl(fd
, GET_DISK_INFO
, &disc
))
572 if (disc
.major
==0 && disc
.minor
==0)
574 if ((disc
.state
& 4)==0) continue; /* sync */
575 /* Looks like a good device to try */
576 dev
= map_dev(disc
.major
, disc
.minor
, 1);
578 dfd
= dev_open(dev
, O_RDONLY
);
579 if (dfd
< 0) continue;
580 if (tst
->ss
->load_super(tst
, dfd
,
588 /* FIXME this is a bad test to be using */
590 fprintf(stderr
, Name
": cannot find valid superblock in this array - HELP\n");
594 /* Make sure device is large enough */
595 if (tst
->ss
->avail_size(tst
, ldsize
/512) <
597 fprintf(stderr
, Name
": %s not large enough to join array\n",
602 /* Possibly this device was recently part of the array
603 * and was temporarily removed, and is now being re-added.
604 * If so, we can simply re-add it.
606 tst
->ss
->uuid_from_super(tst
, duuid
);
608 /* re-add doesn't work for version-1 superblocks
611 if (array
.major_version
== 1 &&
612 get_linux_version() <= 2006018)
615 st
->ss
->uuid_from_super(st
, ouuid
);
616 if (memcmp(duuid
, ouuid
, sizeof(ouuid
))==0) {
617 /* looks close enough for now. Kernel
618 * will worry about whether a bitmap
619 * based reconstruction is possible.
622 st
->ss
->getinfo_super(st
, &mdi
);
623 disc
.major
= major(stb
.st_rdev
);
624 disc
.minor
= minor(stb
.st_rdev
);
625 disc
.number
= mdi
.disk
.number
;
626 disc
.raid_disk
= mdi
.disk
.raid_disk
;
627 disc
.state
= mdi
.disk
.state
;
628 if (dv
->writemostly
== 1)
629 disc
.state
|= 1 << MD_DISK_WRITEMOSTLY
;
630 if (dv
->writemostly
== 2)
631 disc
.state
&= ~(1 << MD_DISK_WRITEMOSTLY
);
632 /* don't even try if disk is marked as faulty */
634 if ((disc
.state
& 1) == 0 &&
635 ioctl(fd
, ADD_NEW_DISK
, &disc
) == 0) {
637 fprintf(stderr
, Name
": re-added %s\n", dv
->devname
);
640 if (errno
== ENOMEM
|| errno
== EROFS
) {
641 fprintf(stderr
, Name
": add new device failed for %s: %s\n",
642 dv
->devname
, strerror(errno
));
645 /* fall back on normal-add */
650 ": --re-add for %s to %s is not possible\n",
651 dv
->devname
, devname
);
655 /* non-persistent. Must ensure that new drive
656 * is at least array.size big.
658 if (ldsize
/512 < array_size
) {
659 fprintf(stderr
, Name
": %s not large enough to join array\n",
664 /* in 2.6.17 and earlier, version-1 superblocks won't
665 * use the number we write, but will choose a free number.
666 * we must choose the same free number, which requires
667 * starting at 'raid_disks' and counting up
669 for (j
= array
.raid_disks
; j
< tst
->max_devs
; j
++) {
671 if (ioctl(fd
, GET_DISK_INFO
, &disc
))
673 if (disc
.major
==0 && disc
.minor
==0)
675 if (disc
.state
& 8) /* removed */
678 disc
.major
= major(stb
.st_rdev
);
679 disc
.minor
= minor(stb
.st_rdev
);
682 if (array
.not_persistent
==0 || tst
->ss
->external
) {
684 if (dv
->writemostly
== 1)
685 disc
.state
|= 1 << MD_DISK_WRITEMOSTLY
;
686 dfd
= dev_open(dv
->devname
, O_RDWR
| O_EXCL
|O_DIRECT
);
687 if (tst
->ss
->add_to_super(tst
, &disc
, dfd
,
692 /* write_init_super will close 'dfd' */
693 if (tst
->ss
->external
)
694 /* mdmon will write the metadata */
696 else if (tst
->ss
->write_init_super(tst
))
698 } else if (dv
->re_add
) {
699 /* this had better be raid1.
700 * As we are "--re-add"ing we must find a spare slot
703 char *used
= malloc(array
.raid_disks
);
704 memset(used
, 0, array
.raid_disks
);
705 for (j
=0; j
< tst
->max_devs
; j
++) {
706 mdu_disk_info_t disc2
;
708 if (ioctl(fd
, GET_DISK_INFO
, &disc2
))
710 if (disc2
.major
==0 && disc2
.minor
==0)
712 if (disc2
.state
& 8) /* removed */
714 if (disc2
.raid_disk
< 0)
716 if (disc2
.raid_disk
> array
.raid_disks
)
718 used
[disc2
.raid_disk
] = 1;
720 for (j
=0 ; j
<array
.raid_disks
; j
++)
723 disc
.state
|= (1<<MD_DISK_SYNC
);
728 if (dv
->writemostly
== 1)
729 disc
.state
|= (1 << MD_DISK_WRITEMOSTLY
);
730 if (tst
->ss
->external
) {
731 /* add a disk to an external metadata container
732 * only if mdmon is around to see it
734 struct mdinfo new_mdi
;
737 int devnum
= fd2devnum(fd
);
739 container_fd
= open_dev_excl(devnum
);
740 if (container_fd
< 0) {
741 fprintf(stderr
, Name
": add failed for %s:"
742 " could not get exclusive access to container\n",
747 if (!mdmon_running(devnum
)) {
748 fprintf(stderr
, Name
": add failed for %s: mdmon not running\n",
754 sra
= sysfs_read(container_fd
, -1, 0);
756 fprintf(stderr
, Name
": add failed for %s: sysfs_read failed\n",
761 sra
->array
.level
= LEVEL_CONTAINER
;
762 /* Need to set data_offset and component_size */
763 tst
->ss
->getinfo_super(tst
, &new_mdi
);
764 new_mdi
.disk
.major
= disc
.major
;
765 new_mdi
.disk
.minor
= disc
.minor
;
766 new_mdi
.recovery_start
= 0;
767 if (sysfs_add_disk(sra
, &new_mdi
, 0) != 0) {
768 fprintf(stderr
, Name
": add new device to external metadata"
769 " failed for %s\n", dv
->devname
);
773 ping_monitor(devnum2devname(devnum
));
776 } else if (ioctl(fd
, ADD_NEW_DISK
, &disc
)) {
777 fprintf(stderr
, Name
": add new device failed for %s as %d: %s\n",
778 dv
->devname
, j
, strerror(errno
));
782 fprintf(stderr
, Name
": added %s\n", dv
->devname
);
787 if (tst
->subarray
[0]) {
788 fprintf(stderr
, Name
": Cannot remove disks from a"
789 " \'member\' array, perform this"
790 " operation on the parent container\n");
795 if (tst
->ss
->external
) {
796 /* To remove a device from a container, we must
797 * check that it isn't in use in an array.
798 * This involves looking in the 'holders'
799 * directory - there must be just one entry,
801 * To ensure that it doesn't get used as a
802 * hold spare while we are checking, we
803 * get an O_EXCL open on the container
805 int dnum
= fd2devnum(fd
);
806 lfd
= open_dev_excl(dnum
);
809 ": Cannot get exclusive access "
810 " to container - odd\n");
815 /* in the detached case it is not possible to
816 * check if we are the unique holder, so just
817 * rely on the 'detached' checks
819 if (strcmp(dv
->devname
, "detached") == 0 ||
821 sysfs_unique_holder(dnum
, stb
.st_rdev
))
825 ": %s is %s, cannot remove.\n",
827 errno
== EEXIST
? "still in use":
833 /* FIXME check that it is a current member */
835 /* device has been removed and we don't know
836 * the major:minor number
838 int n
= write(sysfd
, "remove", 6);
846 err
= ioctl(fd
, HOT_REMOVE_DISK
, (unsigned long)stb
.st_rdev
);
847 if (err
&& errno
== ENODEV
) {
848 /* Old kernels rejected this if no personality
850 struct mdinfo
*sra
= sysfs_read(fd
, 0, GET_DEVS
);
851 struct mdinfo
*dv
= NULL
;
854 for ( ; dv
; dv
=dv
->next
)
855 if (dv
->disk
.major
== major(stb
.st_rdev
) &&
856 dv
->disk
.minor
== minor(stb
.st_rdev
))
859 err
= sysfs_set_str(sra
, dv
,
868 fprintf(stderr
, Name
": hot remove failed "
869 "for %s: %s\n", dnprintable
,
875 if (tst
->ss
->external
) {
877 * Before dropping our exclusive open we make an
878 * attempt at preventing mdmon from seeing an
879 * 'add' event before reconciling this 'remove'
882 char *name
= devnum2devname(fd2devnum(fd
));
885 fprintf(stderr
, Name
": unable to get container name\n");
895 fprintf(stderr
, Name
": hot removed %s\n",
899 case 'f': /* set faulty */
900 /* FIXME check current member */
901 if ((sysfd
>= 0 && write(sysfd
, "faulty", 6) != 6) ||
902 (sysfd
< 0 && ioctl(fd
, SET_DISK_FAULTY
,
903 (unsigned long) stb
.st_rdev
))) {
904 fprintf(stderr
, Name
": set device faulty failed for %s: %s\n",
905 dnprintable
, strerror(errno
));
914 fprintf(stderr
, Name
": set %s faulty in %s\n",
915 dnprintable
, devname
);
925 /* Open any md device, and issue the RAID_AUTORUN ioctl */
927 int fd
= dev_open("9:0", O_RDONLY
);
929 if (ioctl(fd
, RAID_AUTORUN
, 0) == 0)