2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
35 int devnum
; /* to sync with mdstat info */
39 int active
, working
, failed
, spare
, raid
;
41 int devstate
[MAX_DISKS
];
42 dev_t devid
[MAX_DISKS
];
44 int parent_dev
; /* For subarray, devnum of parent.
47 struct supertype
*metadata
;
48 struct state
*subarray
;/* for a container it is a link to first subarray
49 * for a subarray it is a link to next subarray
50 * in the same container */
51 struct state
*parent
; /* for a subarray it is a link to its container
62 static int make_daemon(char *pidfile
);
63 static int check_one_sharer(int scan
);
64 static void alert(char *event
, char *dev
, char *disc
, struct alert_info
*info
);
65 static int check_array(struct state
*st
, struct mdstat_ent
*mdstat
,
66 int test
, struct alert_info
*info
,
67 int increments
, char *prefer
);
68 static int add_new_arrays(struct mdstat_ent
*mdstat
, struct state
**statelist
,
69 int test
, struct alert_info
*info
);
70 static void try_spare_migration(struct state
*statelist
, struct alert_info
*info
);
71 static void link_containers_with_subarrays(struct state
*list
);
73 int Monitor(struct mddev_dev
*devlist
,
74 char *mailaddr
, char *alert_cmd
,
75 int period
, int daemonise
, int scan
, int oneshot
,
76 int dosyslog
, int test
, char *pidfile
, int increments
,
77 int share
, char *prefer
)
80 * Every few seconds, scan every md device looking for changes
81 * When a change is found, log it, possibly run the alert command,
82 * and possibly send Email
84 * For each array, we record:
86 * active/working/failed/spare drives
87 * State of each device.
88 * %rebuilt if rebuilding
90 * If the update time changes, check out all the data again
91 * It is possible that we cannot get the state of each device
92 * due to bugs in the md kernel module.
93 * We also read /proc/mdstat to get rebuild percent,
94 * and to get state on all active devices incase of kernel bug.
98 * An active device had Faulty set or Active/Sync removed
100 * A spare device had Faulty set
102 * An active device had a reverse transition
104 * percent went from -1 to +ve
106 * percent went from below to not-below NN%
108 * Couldn't access a device which was previously visible
110 * if we detect an array with active<raid and spare==0
111 * we look at other arrays that have same spare-group
112 * If we find one with active==raid and spare>0,
113 * and if we can get_disk_info and find a name
114 * Then we hot-remove and hot-add to the other array
116 * If devlist is NULL, then we can monitor everything because --scan
117 * was given. We get an initial list from config file and add anything
118 * that appears in /proc/mdstat
121 struct state
*statelist
= NULL
;
124 struct mdstat_ent
*mdstat
= NULL
;
125 char *mailfrom
= NULL
;
126 struct alert_info info
;
129 mailaddr
= conf_get_mailaddr();
130 if (mailaddr
&& ! scan
)
131 fprintf(stderr
, Name
": Monitor using email address \"%s\" from config file\n",
134 mailfrom
= conf_get_mailfrom();
137 alert_cmd
= conf_get_program();
138 if (alert_cmd
&& ! scan
)
139 fprintf(stderr
, Name
": Monitor using program \"%s\" from config file\n",
142 if (scan
&& !mailaddr
&& !alert_cmd
&& !dosyslog
) {
143 fprintf(stderr
, Name
": No mail address or alert command - not monitoring.\n");
146 info
.alert_cmd
= alert_cmd
;
147 info
.mailaddr
= mailaddr
;
148 info
.mailfrom
= mailfrom
;
149 info
.dosyslog
= dosyslog
;
152 int rv
= make_daemon(pidfile
);
158 if (check_one_sharer(scan
))
161 if (devlist
== NULL
) {
162 struct mddev_ident
*mdlist
= conf_get_ident(NULL
);
163 for (; mdlist
; mdlist
=mdlist
->next
) {
165 if (mdlist
->devname
== NULL
)
167 if (strcasecmp(mdlist
->devname
, "<ignore>") == 0)
169 st
= calloc(1, sizeof *st
);
172 if (mdlist
->devname
[0] == '/')
173 st
->devname
= strdup(mdlist
->devname
);
175 st
->devname
= malloc(8+strlen(mdlist
->devname
)+1);
176 strcpy(strcpy(st
->devname
, "/dev/md/"),
179 st
->next
= statelist
;
180 st
->devnum
= INT_MAX
;
181 st
->percent
= RESYNC_UNKNOWN
;
182 st
->expected_spares
= mdlist
->spare_disks
;
183 if (mdlist
->spare_group
)
184 st
->spare_group
= strdup(mdlist
->spare_group
);
188 struct mddev_dev
*dv
;
189 for (dv
=devlist
; dv
; dv
=dv
->next
) {
190 struct mddev_ident
*mdlist
= conf_get_ident(dv
->devname
);
191 struct state
*st
= calloc(1, sizeof *st
);
194 st
->devname
= strdup(dv
->devname
);
195 st
->next
= statelist
;
196 st
->devnum
= INT_MAX
;
197 st
->percent
= RESYNC_UNKNOWN
;
198 st
->expected_spares
= -1;
200 st
->expected_spares
= mdlist
->spare_disks
;
201 if (mdlist
->spare_group
)
202 st
->spare_group
= strdup(mdlist
->spare_group
);
216 mdstat
= mdstat_read(oneshot
?0:1, 0);
218 for (st
=statelist
; st
; st
=st
->next
)
219 if (check_array(st
, mdstat
, test
, &info
,
223 /* now check if there are any new devices found in mdstat */
225 new_found
= add_new_arrays(mdstat
, &statelist
, test
,
228 /* If an array has active < raid && spare == 0 && spare_group != NULL
229 * Look for another array with spare > 0 and active == raid and same spare_group
230 * if found, choose a device and hotremove/hotadd
232 if (share
&& anydegraded
)
233 try_spare_migration(statelist
, &info
);
242 for (st2
= statelist
; st2
; st2
= statelist
) {
243 statelist
= st2
->next
;
252 static int make_daemon(char *pidfile
)
255 * -1 in the forked daemon
258 * so a none-negative becomes the exit code.
266 pid_file
=fopen(pidfile
, "w");
268 perror("cannot create pid file");
270 fprintf(pid_file
,"%d\n", pid
);
281 open("/dev/null", O_RDWR
);
288 static int check_one_sharer(int scan
)
295 sprintf(path
, "%s/autorebuild.pid", MDMON_DIR
);
296 fp
= fopen(path
, "r");
298 if (fscanf(fp
, "%d", &pid
) != 1)
300 sprintf(dir
, "/proc/%d", pid
);
301 rv
= stat(dir
, &buf
);
304 fprintf(stderr
, Name
": Only one "
305 "autorebuild process allowed"
306 " in scan mode, aborting\n");
310 fprintf(stderr
, Name
": Warning: One"
311 " autorebuild process already"
318 if (mkdir(MDMON_DIR
, S_IRWXU
) < 0 &&
320 fprintf(stderr
, Name
": Can't create "
321 "autorebuild.pid file\n");
323 fp
= fopen(path
, "w");
325 fprintf(stderr
, Name
": Cannot create"
330 fprintf(fp
, "%d\n", pid
);
338 static void alert(char *event
, char *dev
, char *disc
, struct alert_info
*info
)
342 if (!info
->alert_cmd
&& !info
->mailaddr
&& !info
->dosyslog
) {
343 time_t now
= time(0);
345 printf("%1.15s: %s on %s %s\n", ctime(&now
)+4, event
, dev
, disc
?disc
:"unknown device");
347 if (info
->alert_cmd
) {
351 waitpid(pid
, NULL
, 0);
356 execl(info
->alert_cmd
, info
->alert_cmd
,
357 event
, dev
, disc
, NULL
);
361 if (info
->mailaddr
&&
362 (strncmp(event
, "Fail", 4)==0 ||
363 strncmp(event
, "Test", 4)==0 ||
364 strncmp(event
, "Spares", 6)==0 ||
365 strncmp(event
, "Degrade", 7)==0)) {
366 FILE *mp
= popen(Sendmail
, "w");
370 gethostname(hname
, sizeof(hname
));
371 signal(SIGPIPE
, SIG_IGN
);
373 fprintf(mp
, "From: %s\n", info
->mailfrom
);
375 fprintf(mp
, "From: " Name
" monitoring <root>\n");
376 fprintf(mp
, "To: %s\n", info
->mailaddr
);
377 fprintf(mp
, "Subject: %s event on %s:%s\n\n",
381 "This is an automatically generated"
382 " mail message from " Name
"\n");
383 fprintf(mp
, "running on %s\n\n", hname
);
386 "A %s event had been detected on"
387 " md device %s.\n\n", event
, dev
);
389 if (disc
&& disc
[0] != ' ')
391 "It could be related to"
392 " component device %s.\n\n", disc
);
393 if (disc
&& disc
[0] == ' ')
394 fprintf(mp
, "Extra information:%s.\n\n", disc
);
396 fprintf(mp
, "Faithfully yours, etc.\n");
398 mdstat
= fopen("/proc/mdstat", "r");
403 "\nP.S. The /proc/mdstat file"
404 " currently contains the following:\n\n");
405 while ( (n
=fread(buf
, 1, sizeof(buf
), mdstat
)) > 0)
406 n
=fwrite(buf
, 1, n
, mp
);
413 /* log the event to syslog maybe */
414 if (info
->dosyslog
) {
415 /* Log at a different severity depending on the event.
417 * These are the critical events: */
418 if (strncmp(event
, "Fail", 4)==0 ||
419 strncmp(event
, "Degrade", 7)==0 ||
420 strncmp(event
, "DeviceDisappeared", 17)==0)
422 /* Good to know about, but are not failures: */
423 else if (strncmp(event
, "Rebuild", 7)==0 ||
424 strncmp(event
, "MoveSpare", 9)==0 ||
425 strncmp(event
, "Spares", 6) != 0)
426 priority
= LOG_WARNING
;
427 /* Everything else: */
433 "%s event detected on md device %s,"
434 " component device %s", event
, dev
, disc
);
437 "%s event detected on md device %s",
442 static int check_array(struct state
*st
, struct mdstat_ent
*mdstat
,
443 int test
, struct alert_info
*ainfo
,
444 int increments
, char *prefer
)
446 /* Update the state 'st' to reflect any changes shown in mdstat,
447 * or found by directly examining the array, and return
448 * '1' if the array is degraded, or '0' if it is optimal (or dead).
450 struct { int state
, major
, minor
; } info
[MAX_DISKS
];
451 mdu_array_info_t array
;
452 struct mdstat_ent
*mse
= NULL
, *mse2
;
453 char *dev
= st
->devname
;
461 alert("TestMessage", dev
, NULL
, ainfo
);
462 fd
= open(dev
, O_RDONLY
);
465 alert("DeviceDisappeared", dev
, NULL
, ainfo
);
469 fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
470 if (ioctl(fd
, GET_ARRAY_INFO
, &array
)<0) {
472 alert("DeviceDisappeared", dev
, NULL
, ainfo
);
477 /* It's much easier to list what array levels can't
478 * have a device disappear than all of them that can
480 if (array
.level
== 0 || array
.level
== -1) {
482 alert("DeviceDisappeared", dev
, "Wrong-Level", ainfo
);
487 if (st
->devnum
== INT_MAX
) {
489 if (fstat(fd
, &stb
) == 0 &&
490 (S_IFMT
&stb
.st_mode
)==S_IFBLK
) {
491 if (major(stb
.st_rdev
) == MD_MAJOR
)
492 st
->devnum
= minor(stb
.st_rdev
);
494 st
->devnum
= -1- (minor(stb
.st_rdev
)>>6);
498 for (mse2
= mdstat
; mse2
; mse2
=mse2
->next
)
499 if (mse2
->devnum
== st
->devnum
) {
500 mse2
->devnum
= INT_MAX
; /* flag it as "used" */
505 /* duplicated array in statelist
506 * or re-created after reading mdstat*/
511 /* this array is in /proc/mdstat */
512 if (array
.utime
== 0)
513 /* external arrays don't update utime, so
514 * just make sure it is always different. */
515 array
.utime
= st
->utime
+ 1;;
518 /* New array appeared where previously had and error */
520 st
->percent
= RESYNC_NONE
;
522 alert("NewArray", st
->devname
, NULL
, ainfo
);
525 if (st
->utime
== array
.utime
&&
526 st
->failed
== array
.failed_disks
&&
527 st
->working
== array
.working_disks
&&
528 st
->spare
== array
.spare_disks
&&
530 mse
->percent
== st
->percent
533 if ((st
->active
< st
->raid
) && st
->spare
== 0)
538 if (st
->utime
== 0 && /* new array */
539 mse
->pattern
&& strchr(mse
->pattern
, '_') /* degraded */
541 alert("DegradedArray", dev
, NULL
, ainfo
);
543 if (st
->utime
== 0 && /* new array */
544 st
->expected_spares
> 0 &&
545 array
.spare_disks
< st
->expected_spares
)
546 alert("SparesMissing", dev
, NULL
, ainfo
);
547 if (st
->percent
< 0 && st
->percent
!= RESYNC_UNKNOWN
&&
549 alert("RebuildStarted", dev
, NULL
, ainfo
);
550 if (st
->percent
>= 0 &&
552 (mse
->percent
/ increments
) > (st
->percent
/ increments
)) {
553 char percentalert
[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
555 if((mse
->percent
/ increments
) == 0)
556 snprintf(percentalert
, sizeof(percentalert
), "RebuildStarted");
558 snprintf(percentalert
, sizeof(percentalert
), "Rebuild%02d", mse
->percent
);
560 alert(percentalert
, dev
, NULL
, ainfo
);
563 if (mse
->percent
== RESYNC_NONE
&&
565 /* Rebuild/sync/whatever just finished.
566 * If there is a number in /mismatch_cnt,
567 * we should report that.
570 sysfs_read(-1, st
->devnum
, GET_MISMATCH
);
571 if (sra
&& sra
->mismatch_cnt
> 0) {
573 snprintf(cnt
, sizeof(cnt
),
574 " mismatches found: %d (on raid level %d)",
575 sra
->mismatch_cnt
, array
.level
);
576 alert("RebuildFinished", dev
, cnt
, ainfo
);
578 alert("RebuildFinished", dev
, NULL
, ainfo
);
582 st
->percent
= mse
->percent
;
584 remaining_disks
= array
.nr_disks
;
585 for (i
=0; i
<MAX_DISKS
&& remaining_disks
> 0;
587 mdu_disk_info_t disc
;
589 if (ioctl(fd
, GET_DISK_INFO
, &disc
) >= 0) {
590 info
[i
].state
= disc
.state
;
591 info
[i
].major
= disc
.major
;
592 info
[i
].minor
= disc
.minor
;
593 if (disc
.major
|| disc
.minor
)
596 info
[i
].major
= info
[i
].minor
= 0;
600 if (mse
->metadata_version
&&
601 strncmp(mse
->metadata_version
, "external:", 9) == 0 &&
602 is_subarray(mse
->metadata_version
+9))
604 devname2devnum(mse
->metadata_version
+10);
606 st
->parent_dev
= NoMdDev
;
607 if (st
->metadata
== NULL
&&
608 st
->parent_dev
== NoMdDev
)
609 st
->metadata
= super_by_fd(fd
, NULL
);
613 for (i
=0; i
<MAX_DISKS
; i
++) {
614 mdu_disk_info_t disc
= {0,0,0,0,0};
620 (info
[i
].major
|| info
[i
].minor
)) {
621 newstate
= info
[i
].state
;
622 dv
= map_dev_preferred(
623 info
[i
].major
, info
[i
].minor
, 1,
625 disc
.state
= newstate
;
626 disc
.major
= info
[i
].major
;
627 disc
.minor
= info
[i
].minor
;
629 newstate
= (1 << MD_DISK_REMOVED
);
631 if (dv
== NULL
&& st
->devid
[i
])
632 dv
= map_dev_preferred(
634 minor(st
->devid
[i
]), 1, prefer
);
635 change
= newstate
^ st
->devstate
[i
];
636 if (st
->utime
&& change
&& !st
->err
&& !new_array
) {
637 if ((st
->devstate
[i
]&change
)&(1<<MD_DISK_SYNC
))
638 alert("Fail", dev
, dv
, ainfo
);
639 else if ((newstate
& (1<<MD_DISK_FAULTY
)) &&
640 (disc
.major
|| disc
.minor
) &&
641 st
->devid
[i
] == makedev(disc
.major
, disc
.minor
))
642 alert("FailSpare", dev
, dv
, ainfo
);
643 else if ((newstate
&change
)&(1<<MD_DISK_SYNC
))
644 alert("SpareActive", dev
, dv
, ainfo
);
646 st
->devstate
[i
] = newstate
;
647 st
->devid
[i
] = makedev(disc
.major
, disc
.minor
);
649 st
->active
= array
.active_disks
;
650 st
->working
= array
.working_disks
;
651 st
->spare
= array
.spare_disks
;
652 st
->failed
= array
.failed_disks
;
653 st
->utime
= array
.utime
;
654 st
->raid
= array
.raid_disks
;
656 if ((st
->active
< st
->raid
) && st
->spare
== 0)
661 static int add_new_arrays(struct mdstat_ent
*mdstat
, struct state
**statelist
,
662 int test
, struct alert_info
*info
)
664 struct mdstat_ent
*mse
;
667 for (mse
=mdstat
; mse
; mse
=mse
->next
)
668 if (mse
->devnum
!= INT_MAX
&&
669 (!mse
->level
|| /* retrieve containers */
670 (strcmp(mse
->level
, "raid0") != 0 &&
671 strcmp(mse
->level
, "linear") != 0))
673 struct state
*st
= calloc(1, sizeof *st
);
674 mdu_array_info_t array
;
678 st
->devname
= strdup(get_md_name(mse
->devnum
));
679 if ((fd
= open(st
->devname
, O_RDONLY
)) < 0 ||
680 ioctl(fd
, GET_ARRAY_INFO
, &array
)< 0) {
682 if (fd
>=0) close(fd
);
683 put_md_name(st
->devname
);
686 st
->metadata
->ss
->free_super(st
->metadata
);
693 st
->next
= *statelist
;
695 st
->devnum
= mse
->devnum
;
696 st
->percent
= RESYNC_UNKNOWN
;
697 st
->expected_spares
= -1;
698 if (mse
->metadata_version
&&
699 strncmp(mse
->metadata_version
, "external:", 9) == 0 &&
700 is_subarray(mse
->metadata_version
+9))
702 devname2devnum(mse
->metadata_version
+10);
704 st
->parent_dev
= NoMdDev
;
707 alert("TestMessage", st
->devname
, NULL
, info
);
713 static int get_min_spare_size_required(struct state
*st
, unsigned long long *sizep
)
718 !st
->metadata
->ss
->min_acceptable_spare_size
) {
723 fd
= open(st
->devname
, O_RDONLY
);
726 if (st
->metadata
->ss
->external
)
727 st
->metadata
->ss
->load_container(st
->metadata
, fd
, st
->devname
);
729 st
->metadata
->ss
->load_super(st
->metadata
, fd
, st
->devname
);
731 if (!st
->metadata
->sb
)
733 *sizep
= st
->metadata
->ss
->min_acceptable_spare_size(st
->metadata
);
734 st
->metadata
->ss
->free_super(st
->metadata
);
739 static int check_donor(struct state
*from
, struct state
*to
)
746 /* Cannot move from a member */
750 for (sub
= from
->subarray
; sub
; sub
= sub
->subarray
)
751 /* If source array has degraded subarrays, don't
754 if (sub
->active
< sub
->raid
)
756 if (from
->metadata
->ss
->external
== 0)
757 if (from
->active
< from
->raid
)
759 if (from
->spare
<= 0)
764 static dev_t
choose_spare(struct state
*from
, struct state
*to
,
765 struct domainlist
*domlist
, unsigned long long min_size
)
770 for (d
= from
->raid
; !dev
&& d
< MAX_DISKS
; d
++) {
771 if (from
->devid
[d
] > 0 &&
772 from
->devstate
[d
] == 0) {
773 struct dev_policy
*pol
;
774 unsigned long long dev_size
;
776 if (to
->metadata
->ss
->external
&&
777 test_partition_from_id(from
->devid
[d
]))
781 dev_size_from_id(from
->devid
[d
], &dev_size
) &&
785 pol
= devnum_policy(from
->devid
[d
]);
786 if (from
->spare_group
)
787 pol_add(&pol
, pol_domain
,
788 from
->spare_group
, NULL
);
789 if (domain_test(domlist
, pol
, to
->metadata
->ss
->name
) == 1)
790 dev
= from
->devid
[d
];
791 dev_policy_free(pol
);
797 static dev_t
container_choose_spare(struct state
*from
, struct state
*to
,
798 struct domainlist
*domlist
,
799 unsigned long long min_size
, int active
)
801 /* This is similar to choose_spare, but we cannot trust devstate,
802 * so we need to read the metadata instead
805 struct supertype
*st
= from
->metadata
;
806 int fd
= open(from
->devname
, O_RDONLY
);
812 if (!st
->ss
->getinfo_super_disks
) {
817 err
= st
->ss
->load_container(st
, fd
, NULL
);
823 /* We must check if number of active disks has not increased
824 * since ioctl in main loop. mdmon may have added spare
825 * to subarray. If so we do not need to look for more spares
826 * so return non zero value */
829 list
= st
->ss
->getinfo_super_disks(st
);
831 st
->ss
->free_super(st
);
836 if (dp
->disk
.state
& (1<<MD_DISK_SYNC
) &&
837 !(dp
->disk
.state
& (1<<MD_DISK_FAULTY
)))
842 if (active
< active_cnt
) {
843 /* Spare just activated.*/
844 st
->ss
->free_super(st
);
849 /* We only need one spare so full list not needed */
850 list
= container_choose_spares(st
, min_size
, domlist
, from
->spare_group
,
851 to
->metadata
->ss
->name
, 1);
853 struct mdinfo
*disks
= list
->devs
;
855 dev
= makedev(disks
->disk
.major
, disks
->disk
.minor
);
858 st
->ss
->free_super(st
);
863 static void try_spare_migration(struct state
*statelist
, struct alert_info
*info
)
868 link_containers_with_subarrays(statelist
);
869 for (st
= statelist
; st
; st
= st
->next
)
870 if (st
->active
< st
->raid
&&
871 st
->spare
== 0 && !st
->err
) {
872 struct domainlist
*domlist
= NULL
;
874 struct state
*to
= st
;
875 unsigned long long min_size
;
877 if (to
->parent_dev
!= NoMdDev
&& !to
->parent
)
878 /* subarray monitored without parent container
879 * we can't move spares here */
883 /* member of a container */
886 if (get_min_spare_size_required(to
, &min_size
))
888 if (to
->metadata
->ss
->external
) {
889 /* We must make sure there is
890 * no suitable spare in container already.
891 * If there is we don't add more */
892 dev_t devid
= container_choose_spare(
893 to
, to
, NULL
, min_size
, st
->active
);
897 for (d
= 0; d
< MAX_DISKS
; d
++)
899 domainlist_add_dev(&domlist
,
901 to
->metadata
->ss
->name
);
903 domain_add(&domlist
, to
->spare_group
);
905 * No spare migration if the destination
906 * has no domain. Skip this array.
910 for (from
=statelist
; from
; from
=from
->next
) {
912 if (!check_donor(from
, to
))
914 if (from
->metadata
->ss
->external
)
915 devid
= container_choose_spare(
916 from
, to
, domlist
, min_size
, 0);
918 devid
= choose_spare(from
, to
, domlist
,
921 && move_spare(from
->devname
, to
->devname
, devid
)) {
922 alert("MoveSpare", to
->devname
, from
->devname
, info
);
926 domain_free(domlist
);
930 /* search the statelist to connect external
931 * metadata subarrays with their containers
932 * We always completely rebuild the tree from scratch as
933 * that is safest considering the possibility of entries
934 * disappearing or changing.
936 static void link_containers_with_subarrays(struct state
*list
)
940 for (st
= list
; st
; st
= st
->next
) {
944 for (st
= list
; st
; st
= st
->next
)
945 if (st
->parent_dev
!= NoMdDev
)
946 for (cont
= list
; cont
; cont
= cont
->next
)
948 cont
->parent_dev
== NoMdDev
&&
949 cont
->devnum
== st
->parent_dev
) {
951 st
->subarray
= cont
->subarray
;
957 /* Not really Monitor but ... */
964 if (stat(dev
, &stb
) != 0) {
965 fprintf(stderr
, Name
": Cannot find %s: %s\n", dev
,
969 devnum
= stat2devnum(&stb
);
972 struct mdstat_ent
*ms
= mdstat_read(1, 0);
973 struct mdstat_ent
*e
;
975 for (e
=ms
; e
; e
=e
->next
)
976 if (e
->devnum
== devnum
)
979 if (!e
|| e
->percent
< 0) {
980 if (e
&& e
->metadata_version
&&
981 strncmp(e
->metadata_version
, "external:", 9) == 0) {
982 if (is_subarray(&e
->metadata_version
[9]))
983 ping_monitor(&e
->metadata_version
[9]);
985 ping_monitor_by_id(devnum
);
998 static char *clean_states
[] = {
999 "clear", "inactive", "readonly", "read-auto", "clean", NULL
};
1001 int WaitClean(char *dev
, int sock
, int verbose
)
1008 fd
= open(dev
, O_RDONLY
);
1011 fprintf(stderr
, Name
": Couldn't open %s: %s\n", dev
, strerror(errno
));
1015 devnum
= fd2devnum(fd
);
1016 mdi
= sysfs_read(fd
, devnum
, GET_VERSION
|GET_LEVEL
|GET_SAFEMODE
);
1019 fprintf(stderr
, Name
": Failed to read sysfs attributes for "
1025 switch(mdi
->array
.level
) {
1027 case LEVEL_MULTIPATH
:
1029 /* safemode delay is irrelevant for these levels */
1033 /* for internal metadata the kernel handles the final clean
1034 * transition, containers can never be dirty
1036 if (!is_subarray(mdi
->text_version
))
1039 /* safemode disabled ? */
1040 if (mdi
->safe_mode_delay
== 0)
1044 int state_fd
= sysfs_open(fd2devnum(fd
), NULL
, "array_state");
1049 /* minimize the safe_mode_delay and prepare to wait up to 5s
1050 * for writes to quiesce
1052 sysfs_set_safemode(mdi
, 1);
1058 /* wait for array_state to be clean */
1060 rv
= read(state_fd
, buf
, sizeof(buf
));
1063 if (sysfs_match_word(buf
, clean_states
) <= 4)
1065 FD_SET(state_fd
, &fds
);
1066 rv
= select(state_fd
+ 1, NULL
, NULL
, &fds
, &tm
);
1067 if (rv
< 0 && errno
!= EINTR
)
1069 lseek(state_fd
, 0, SEEK_SET
);
1073 else if (fping_monitor(sock
) == 0 ||
1074 ping_monitor(mdi
->text_version
) == 0) {
1075 /* we need to ping to close the window between array
1076 * state transitioning to clean and the metadata being
1083 fprintf(stderr
, Name
": Error waiting for %s to be clean\n",
1086 /* restore the original safe_mode_delay */
1087 sysfs_set_safemode(mdi
, mdi
->safe_mode_delay
);
1096 #endif /* MDASSEMBLE */