]>
git.ipfire.org Git - thirdparty/mdadm.git/blob - Monitor.c
2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
36 char devname
[MD_NAME_MAX
+ sizeof("/dev/md/")]; /* length of "/dev/md/" + device name + terminating byte*/
37 char devnm
[MD_NAME_MAX
]; /* to sync with mdstat info */
41 int active
, working
, failed
, spare
, raid
;
45 int devstate
[MAX_DISKS
];
46 dev_t devid
[MAX_DISKS
];
48 char parent_devnm
[MD_NAME_MAX
]; /* For subarray, devnm of parent.
51 struct supertype
*metadata
;
52 struct state
*subarray
;/* for a container it is a link to first subarray
53 * for a subarray it is a link to next subarray
54 * in the same container */
55 struct state
*parent
; /* for a subarray it is a link to its container
66 static int make_daemon(char *pidfile
);
67 static int check_one_sharer(int scan
);
68 static void write_autorebuild_pid(void);
69 static void alert(const char *event
, const char *dev
, const char *disc
, struct alert_info
*info
);
70 static int check_array(struct state
*st
, struct mdstat_ent
*mdstat
,
71 int test
, struct alert_info
*info
,
72 int increments
, char *prefer
);
73 static int add_new_arrays(struct mdstat_ent
*mdstat
, struct state
**statelist
,
74 int test
, struct alert_info
*info
);
75 static void try_spare_migration(struct state
*statelist
, struct alert_info
*info
);
76 static void link_containers_with_subarrays(struct state
*list
);
77 static void free_statelist(struct state
*statelist
);
79 static int check_udev_activity(void);
82 int Monitor(struct mddev_dev
*devlist
,
83 char *mailaddr
, char *alert_cmd
,
85 int daemonise
, int oneshot
,
86 int dosyslog
, char *pidfile
, int increments
,
90 * Every few seconds, scan every md device looking for changes
91 * When a change is found, log it, possibly run the alert command,
92 * and possibly send Email
94 * For each array, we record:
96 * active/working/failed/spare drives
97 * State of each device.
98 * %rebuilt if rebuilding
100 * If the update time changes, check out all the data again
101 * It is possible that we cannot get the state of each device
102 * due to bugs in the md kernel module.
103 * We also read /proc/mdstat to get rebuild percent,
104 * and to get state on all active devices incase of kernel bug.
108 * An active device had Faulty set or Active/Sync removed
110 * A spare device had Faulty set
112 * An active device had a reverse transition
114 * percent went from -1 to +ve
116 * percent went from below to not-below NN%
118 * Couldn't access a device which was previously visible
120 * if we detect an array with active<raid and spare==0
121 * we look at other arrays that have same spare-group
122 * If we find one with active==raid and spare>0,
123 * and if we can get_disk_info and find a name
124 * Then we hot-remove and hot-add to the other array
126 * If devlist is NULL, then we can monitor everything if --scan
127 * was given. We get an initial list from config file and add anything
128 * that appears in /proc/mdstat
131 struct state
*statelist
= NULL
;
133 struct mdstat_ent
*mdstat
= NULL
;
135 struct alert_info info
;
136 struct mddev_ident
*mdlist
;
137 int delay_for_event
= c
->delay
;
139 if (devlist
&& c
->scan
) {
140 pr_err("Devices list and --scan option cannot be combined - not monitoring.\n");
145 mailaddr
= conf_get_mailaddr();
148 alert_cmd
= conf_get_program();
150 mailfrom
= conf_get_mailfrom();
152 if (c
->scan
&& !mailaddr
&& !alert_cmd
&& !dosyslog
) {
153 pr_err("No mail address or alert command - not monitoring.\n");
158 pr_err("Monitor is started with delay %ds\n", c
->delay
);
160 pr_err("Monitor using email address %s\n", mailaddr
);
162 pr_err("Monitor using program %s\n", alert_cmd
);
165 info
.alert_cmd
= alert_cmd
;
166 info
.mailaddr
= mailaddr
;
167 info
.mailfrom
= mailfrom
;
168 info
.dosyslog
= dosyslog
;
171 if (check_one_sharer(c
->scan
))
176 int rv
= make_daemon(pidfile
);
182 write_autorebuild_pid();
184 if (devlist
== NULL
) {
185 mdlist
= conf_get_ident(NULL
);
186 for (; mdlist
; mdlist
= mdlist
->next
) {
189 if (mdlist
->devname
== NULL
)
191 if (strcasecmp(mdlist
->devname
, "<ignore>") == 0)
193 if (!is_mddev(mdlist
->devname
))
196 st
= xcalloc(1, sizeof *st
);
197 snprintf(st
->devname
, MD_NAME_MAX
+ sizeof("/dev/md/"),
198 "/dev/md/%s", basename(mdlist
->devname
));
199 st
->next
= statelist
;
201 st
->percent
= RESYNC_UNKNOWN
;
203 st
->expected_spares
= mdlist
->spare_disks
;
204 if (mdlist
->spare_group
)
205 st
->spare_group
= xstrdup(mdlist
->spare_group
);
209 struct mddev_dev
*dv
;
211 for (dv
= devlist
; dv
; dv
= dv
->next
) {
214 if (!is_mddev(dv
->devname
))
217 st
= xcalloc(1, sizeof *st
);
218 mdlist
= conf_get_ident(dv
->devname
);
219 snprintf(st
->devname
, MD_NAME_MAX
+ sizeof("/dev/md/"), "%s", dv
->devname
);
220 st
->next
= statelist
;
222 st
->percent
= RESYNC_UNKNOWN
;
223 st
->expected_spares
= -1;
225 st
->expected_spares
= mdlist
->spare_disks
;
226 if (mdlist
->spare_group
)
227 st
->spare_group
= xstrdup(mdlist
->spare_group
);
235 struct state
*st
, **stp
;
237 int anyredundant
= 0;
241 mdstat
= mdstat_read(oneshot
? 0 : 1, 0);
243 for (st
= statelist
; st
; st
= st
->next
) {
244 if (check_array(st
, mdstat
, c
->test
, &info
,
245 increments
, c
->prefer
))
247 /* for external arrays, metadata is filled for
250 if (st
->metadata
&& st
->metadata
->ss
->external
)
252 if (st
->err
== 0 && !anyredundant
)
256 /* now check if there are any new devices found in mdstat */
258 new_found
= add_new_arrays(mdstat
, &statelist
, c
->test
,
261 /* If an array has active < raid && spare == 0 && spare_group != NULL
262 * Look for another array with spare > 0 and active == raid and same spare_group
263 * if found, choose a device and hotremove/hotadd
265 if (share
&& anydegraded
)
266 try_spare_migration(statelist
, &info
);
270 else if (!anyredundant
) {
271 pr_err("No array with redundancy detected, stopping\n");
277 * Wait for udevd to finish new devices
280 if (mdstat_wait(delay_for_event
) &&
281 check_udev_activity())
282 pr_err("Error while waiting for UDEV to complete new devices processing\n");
284 int wait_result
= mdstat_wait(delay_for_event
);
286 * Give chance to process new device
288 if (wait_result
!= 0) {
292 delay_for_event
= c
->delay
;
299 for (stp
= &statelist
; (st
= *stp
) != NULL
; ) {
300 if (st
->from_auto
&& st
->err
> 5) {
303 free(st
->spare_group
);
311 free_statelist(statelist
);
318 static int make_daemon(char *pidfile
)
321 * -1 in the forked daemon
324 * so a none-negative becomes the exit code.
331 FILE *pid_file
= NULL
;
332 int fd
= open(pidfile
, O_WRONLY
| O_CREAT
| O_TRUNC
,
335 pid_file
= fdopen(fd
, "w");
337 perror("cannot create pid file");
339 fprintf(pid_file
,"%d\n", pid
);
354 static int check_one_sharer(int scan
)
359 char comm_path
[PATH_MAX
];
363 sprintf(path
, "%s/autorebuild.pid", MDMON_DIR
);
364 fp
= fopen(path
, "r");
366 if (fscanf(fp
, "%d", &pid
) != 1)
368 snprintf(comm_path
, sizeof(comm_path
),
369 "/proc/%d/comm", pid
);
370 comm_fp
= fopen(comm_path
, "r");
372 if (fscanf(comm_fp
, "%19s", comm
) &&
373 strncmp(basename(comm
), Name
, strlen(Name
)) == 0) {
375 pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
380 pr_err("Warning: One autorebuild process already running.\n");
390 static void write_autorebuild_pid()
395 sprintf(path
, "%s/autorebuild.pid", MDMON_DIR
);
397 if (mkdir(MDMON_DIR
, 0700) < 0 && errno
!= EEXIST
) {
398 pr_err("Can't create autorebuild.pid file\n");
400 int fd
= open(path
, O_WRONLY
| O_CREAT
| O_TRUNC
, 0700);
403 fp
= fdopen(fd
, "w");
406 pr_err("Can't create autorebuild.pid file\n");
409 fprintf(fp
, "%d\n", pid
);
415 static void execute_alert_cmd(const char *event
, const char *dev
, const char *disc
, struct alert_info
*info
)
421 waitpid(pid
, NULL
, 0);
424 pr_err("Cannot fork to execute alert command");
427 execl(info
->alert_cmd
, info
->alert_cmd
, event
, dev
, disc
, NULL
);
432 static void send_event_email(const char *event
, const char *dev
, const char *disc
, struct alert_info
*info
)
439 mp
= popen(Sendmail
, "w");
441 pr_err("Cannot open pipe stream for sendmail.\n");
445 gethostname(hname
, sizeof(hname
));
446 signal(SIGPIPE
, SIG_IGN
);
448 fprintf(mp
, "From: %s\n", info
->mailfrom
);
450 fprintf(mp
, "From: %s monitoring <root>\n", Name
);
451 fprintf(mp
, "To: %s\n", info
->mailaddr
);
452 fprintf(mp
, "Subject: %s event on %s:%s\n\n", event
, dev
, hname
);
453 fprintf(mp
, "This is an automatically generated mail message. \n");
454 fprintf(mp
, "A %s event had been detected on md device %s.\n\n", event
, dev
);
456 if (disc
&& disc
[0] != ' ')
458 "It could be related to component device %s.\n\n", disc
);
459 if (disc
&& disc
[0] == ' ')
460 fprintf(mp
, "Extra information:%s.\n\n", disc
);
462 mdstat
= fopen("/proc/mdstat", "r");
464 pr_err("Cannot open /proc/mdstat\n");
469 fprintf(mp
, "The /proc/mdstat file currently contains the following:\n\n");
470 while ((n
= fread(buf
, 1, sizeof(buf
), mdstat
)) > 0)
471 n
= fwrite(buf
, 1, n
, mp
);
476 static void log_event_to_syslog(const char *event
, const char *dev
, const char *disc
)
479 /* Log at a different severity depending on the event.
481 * These are the critical events: */
482 if (strncmp(event
, "Fail", 4) == 0 ||
483 strncmp(event
, "Degrade", 7) == 0 ||
484 strncmp(event
, "DeviceDisappeared", 17) == 0)
486 /* Good to know about, but are not failures: */
487 else if (strncmp(event
, "Rebuild", 7) == 0 ||
488 strncmp(event
, "MoveSpare", 9) == 0 ||
489 strncmp(event
, "Spares", 6) != 0)
490 priority
= LOG_WARNING
;
491 /* Everything else: */
495 if (disc
&& disc
[0] != ' ')
497 "%s event detected on md device %s, component device %s", event
, dev
, disc
);
499 syslog(priority
, "%s event detected on md device %s: %s", event
, dev
, disc
);
501 syslog(priority
, "%s event detected on md device %s", event
, dev
);
504 static void alert(const char *event
, const char *dev
, const char *disc
, struct alert_info
*info
)
506 if (!info
->alert_cmd
&& !info
->mailaddr
&& !info
->dosyslog
) {
507 time_t now
= time(0);
509 printf("%1.15s: %s on %s %s\n", ctime(&now
) + 4,
510 event
, dev
, disc
?disc
:"unknown device");
513 execute_alert_cmd(event
, dev
, disc
, info
);
515 if (info
->mailaddr
&& (strncmp(event
, "Fail", 4) == 0 ||
516 strncmp(event
, "Test", 4) == 0 ||
517 strncmp(event
, "Spares", 6) == 0 ||
518 strncmp(event
, "Degrade", 7) == 0)) {
519 send_event_email(event
, dev
, disc
, info
);
523 log_event_to_syslog(event
, dev
, disc
);
526 static int check_array(struct state
*st
, struct mdstat_ent
*mdstat
,
527 int test
, struct alert_info
*ainfo
,
528 int increments
, char *prefer
)
530 /* Update the state 'st' to reflect any changes shown in mdstat,
531 * or found by directly examining the array, and return
532 * '1' if the array is degraded, or '0' if it is optimal (or dead).
534 struct { int state
, major
, minor
; } info
[MAX_DISKS
];
535 struct mdinfo
*sra
= NULL
;
536 mdu_array_info_t array
;
537 struct mdstat_ent
*mse
= NULL
, *mse2
;
538 char *dev
= st
->devname
;
545 int is_container
= 0;
546 unsigned long redundancy_only_flags
= 0;
549 alert("TestMessage", dev
, NULL
, ainfo
);
553 fd
= open(dev
, O_RDONLY
);
557 if (st
->devnm
[0] == 0)
558 snprintf(st
->devnm
, MD_NAME_MAX
, "%s", fd2devnm(fd
));
560 for (mse2
= mdstat
; mse2
; mse2
= mse2
->next
)
561 if (strcmp(mse2
->devnm
, st
->devnm
) == 0) {
562 mse2
->devnm
[0] = 0; /* flag it as "used" */
567 /* duplicated array in statelist
568 * or re-created after reading mdstat
574 if (mse
->level
== NULL
)
577 if (!is_container
&& !md_array_active(fd
))
580 fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
581 if (md_get_array_info(fd
, &array
) < 0)
584 if (!is_container
&& map_name(pers
, mse
->level
) > 0)
585 redundancy_only_flags
|= GET_MISMATCH
;
587 sra
= sysfs_read(-1, st
->devnm
, GET_LEVEL
| GET_DISKS
| GET_DEVS
|
588 GET_STATE
| redundancy_only_flags
);
593 /* It's much easier to list what array levels can't
594 * have a device disappear than all of them that can
596 if (sra
->array
.level
== 0 || sra
->array
.level
== -1) {
597 if (!st
->err
&& !st
->from_config
)
598 alert("DeviceDisappeared", dev
, " Wrong-Level", ainfo
);
603 /* this array is in /proc/mdstat */
604 if (array
.utime
== 0)
605 /* external arrays don't update utime, so
606 * just make sure it is always different. */
607 array
.utime
= st
->utime
+ 1;;
610 /* New array appeared where previously had an error */
612 st
->percent
= RESYNC_NONE
;
615 alert("NewArray", st
->devname
, NULL
, ainfo
);
618 if (st
->utime
== array
.utime
&& st
->failed
== sra
->array
.failed_disks
&&
619 st
->working
== sra
->array
.working_disks
&&
620 st
->spare
== sra
->array
.spare_disks
&&
621 (mse
== NULL
|| (mse
->percent
== st
->percent
))) {
622 if ((st
->active
< st
->raid
) && st
->spare
== 0)
626 if (st
->utime
== 0 && /* new array */
627 mse
->pattern
&& strchr(mse
->pattern
, '_') /* degraded */)
628 alert("DegradedArray", dev
, NULL
, ainfo
);
630 if (st
->utime
== 0 && /* new array */ st
->expected_spares
> 0 &&
631 sra
->array
.spare_disks
< st
->expected_spares
)
632 alert("SparesMissing", dev
, NULL
, ainfo
);
633 if (st
->percent
< 0 && st
->percent
!= RESYNC_UNKNOWN
&&
635 alert("RebuildStarted", dev
, NULL
, ainfo
);
636 if (st
->percent
>= 0 && mse
->percent
>= 0 &&
637 (mse
->percent
/ increments
) > (st
->percent
/ increments
)) {
638 char percentalert
[18];
640 * "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
643 if((mse
->percent
/ increments
) == 0)
644 snprintf(percentalert
, sizeof(percentalert
),
647 snprintf(percentalert
, sizeof(percentalert
),
648 "Rebuild%02d", mse
->percent
);
650 alert(percentalert
, dev
, NULL
, ainfo
);
653 if (mse
->percent
== RESYNC_NONE
&& st
->percent
>= 0) {
654 /* Rebuild/sync/whatever just finished.
655 * If there is a number in /mismatch_cnt,
656 * we should report that.
658 if (sra
&& sra
->mismatch_cnt
> 0) {
660 snprintf(cnt
, sizeof(cnt
),
661 " mismatches found: %d (on raid level %d)",
662 sra
->mismatch_cnt
, sra
->array
.level
);
663 alert("RebuildFinished", dev
, cnt
, ainfo
);
665 alert("RebuildFinished", dev
, NULL
, ainfo
);
667 st
->percent
= mse
->percent
;
669 remaining_disks
= sra
->array
.nr_disks
;
670 for (i
= 0; i
< MAX_DISKS
&& remaining_disks
> 0; i
++) {
671 mdu_disk_info_t disc
;
673 if (md_get_disk_info(fd
, &disc
) >= 0) {
674 info
[i
].state
= disc
.state
;
675 info
[i
].major
= disc
.major
;
676 info
[i
].minor
= disc
.minor
;
677 if (disc
.major
|| disc
.minor
)
680 info
[i
].major
= info
[i
].minor
= 0;
684 if (mse
->metadata_version
&&
685 strncmp(mse
->metadata_version
, "external:", 9) == 0 &&
686 is_subarray(mse
->metadata_version
+9)) {
688 snprintf(st
->parent_devnm
, MD_NAME_MAX
, "%s", mse
->metadata_version
+ 10);
689 sl
= strchr(st
->parent_devnm
, '/');
693 st
->parent_devnm
[0] = 0;
694 if (st
->metadata
== NULL
&& st
->parent_devnm
[0] == 0)
695 st
->metadata
= super_by_fd(fd
, NULL
);
697 for (i
= 0; i
< MAX_DISKS
; i
++) {
698 mdu_disk_info_t disc
= {0, 0, 0, 0, 0};
703 if (i
< last_disk
&& (info
[i
].major
|| info
[i
].minor
)) {
704 newstate
= info
[i
].state
;
705 dv
= map_dev_preferred(info
[i
].major
, info
[i
].minor
, 1,
707 disc
.state
= newstate
;
708 disc
.major
= info
[i
].major
;
709 disc
.minor
= info
[i
].minor
;
711 newstate
= (1 << MD_DISK_REMOVED
);
713 if (dv
== NULL
&& st
->devid
[i
])
714 dv
= map_dev_preferred(major(st
->devid
[i
]),
715 minor(st
->devid
[i
]), 1, prefer
);
716 change
= newstate
^ st
->devstate
[i
];
717 if (st
->utime
&& change
&& !st
->err
&& !new_array
) {
718 if ((st
->devstate
[i
]&change
) & (1 << MD_DISK_SYNC
))
719 alert("Fail", dev
, dv
, ainfo
);
720 else if ((newstate
& (1 << MD_DISK_FAULTY
)) &&
721 (disc
.major
|| disc
.minor
) &&
722 st
->devid
[i
] == makedev(disc
.major
,
724 alert("FailSpare", dev
, dv
, ainfo
);
725 else if ((newstate
&change
) & (1 << MD_DISK_SYNC
))
726 alert("SpareActive", dev
, dv
, ainfo
);
728 st
->devstate
[i
] = newstate
;
729 st
->devid
[i
] = makedev(disc
.major
, disc
.minor
);
731 st
->active
= sra
->array
.active_disks
;
732 st
->working
= sra
->array
.working_disks
;
733 st
->spare
= sra
->array
.spare_disks
;
734 st
->failed
= sra
->array
.failed_disks
;
735 st
->utime
= array
.utime
;
736 st
->raid
= sra
->array
.raid_disks
;
738 if ((st
->active
< st
->raid
) && st
->spare
== 0)
749 if (!st
->err
&& !is_container
)
750 alert("DeviceDisappeared", dev
, NULL
, ainfo
);
755 static int add_new_arrays(struct mdstat_ent
*mdstat
, struct state
**statelist
,
756 int test
, struct alert_info
*info
)
758 struct mdstat_ent
*mse
;
762 for (mse
= mdstat
; mse
; mse
= mse
->next
)
763 if (mse
->devnm
[0] && (!mse
->level
|| /* retrieve containers */
764 (strcmp(mse
->level
, "raid0") != 0 &&
765 strcmp(mse
->level
, "linear") != 0))) {
766 struct state
*st
= xcalloc(1, sizeof *st
);
767 mdu_array_info_t array
;
770 name
= get_md_name(mse
->devnm
);
776 snprintf(st
->devname
, MD_NAME_MAX
+ sizeof("/dev/md/"), "%s", name
);
777 if ((fd
= open(st
->devname
, O_RDONLY
)) < 0 ||
778 md_get_array_info(fd
, &array
) < 0) {
782 put_md_name(st
->devname
);
784 st
->metadata
->ss
->free_super(st
->metadata
);
791 st
->next
= *statelist
;
794 snprintf(st
->devnm
, MD_NAME_MAX
, "%s", mse
->devnm
);
795 st
->percent
= RESYNC_UNKNOWN
;
796 st
->expected_spares
= -1;
797 if (mse
->metadata_version
&&
798 strncmp(mse
->metadata_version
,
799 "external:", 9) == 0 &&
800 is_subarray(mse
->metadata_version
+9)) {
802 snprintf(st
->parent_devnm
, MD_NAME_MAX
,
803 "%s", mse
->metadata_version
+ 10);
804 sl
= strchr(st
->parent_devnm
, '/');
807 st
->parent_devnm
[0] = 0;
810 alert("TestMessage", st
->devname
, NULL
, info
);
816 static int get_required_spare_criteria(struct state
*st
,
817 struct spare_criteria
*sc
)
821 if (!st
->metadata
|| !st
->metadata
->ss
->get_spare_criteria
) {
827 fd
= open(st
->devname
, O_RDONLY
);
830 if (st
->metadata
->ss
->external
)
831 st
->metadata
->ss
->load_container(st
->metadata
, fd
, st
->devname
);
833 st
->metadata
->ss
->load_super(st
->metadata
, fd
, st
->devname
);
835 if (!st
->metadata
->sb
)
838 st
->metadata
->ss
->get_spare_criteria(st
->metadata
, sc
);
839 st
->metadata
->ss
->free_super(st
->metadata
);
844 static int check_donor(struct state
*from
, struct state
*to
)
851 /* Cannot move from a member */
855 for (sub
= from
->subarray
; sub
; sub
= sub
->subarray
)
856 /* If source array has degraded subarrays, don't
859 if (sub
->active
< sub
->raid
)
861 if (from
->metadata
->ss
->external
== 0)
862 if (from
->active
< from
->raid
)
864 if (from
->spare
<= 0)
869 static dev_t
choose_spare(struct state
*from
, struct state
*to
,
870 struct domainlist
*domlist
, struct spare_criteria
*sc
)
875 for (d
= from
->raid
; !dev
&& d
< MAX_DISKS
; d
++) {
876 if (from
->devid
[d
] > 0 && from
->devstate
[d
] == 0) {
877 struct dev_policy
*pol
;
878 unsigned long long dev_size
;
879 unsigned int dev_sector_size
;
881 if (to
->metadata
->ss
->external
&&
882 test_partition_from_id(from
->devid
[d
]))
886 dev_size_from_id(from
->devid
[d
], &dev_size
) &&
887 dev_size
< sc
->min_size
)
890 if (sc
->sector_size
&&
891 dev_sector_size_from_id(from
->devid
[d
],
893 sc
->sector_size
!= dev_sector_size
)
896 pol
= devid_policy(from
->devid
[d
]);
897 if (from
->spare_group
)
898 pol_add(&pol
, pol_domain
,
899 from
->spare_group
, NULL
);
900 if (domain_test(domlist
, pol
,
901 to
->metadata
->ss
->name
) == 1)
902 dev
= from
->devid
[d
];
903 dev_policy_free(pol
);
909 static dev_t
container_choose_spare(struct state
*from
, struct state
*to
,
910 struct domainlist
*domlist
,
911 struct spare_criteria
*sc
, int active
)
913 /* This is similar to choose_spare, but we cannot trust devstate,
914 * so we need to read the metadata instead
917 struct supertype
*st
= from
->metadata
;
918 int fd
= open(from
->devname
, O_RDONLY
);
924 if (!st
->ss
->getinfo_super_disks
) {
929 err
= st
->ss
->load_container(st
, fd
, NULL
);
935 /* We must check if number of active disks has not increased
936 * since ioctl in main loop. mdmon may have added spare
937 * to subarray. If so we do not need to look for more spares
938 * so return non zero value */
941 list
= st
->ss
->getinfo_super_disks(st
);
943 st
->ss
->free_super(st
);
948 if (dp
->disk
.state
& (1 << MD_DISK_SYNC
) &&
949 !(dp
->disk
.state
& (1 << MD_DISK_FAULTY
)))
954 if (active
< active_cnt
) {
955 /* Spare just activated.*/
956 st
->ss
->free_super(st
);
961 /* We only need one spare so full list not needed */
962 list
= container_choose_spares(st
, sc
, domlist
, from
->spare_group
,
963 to
->metadata
->ss
->name
, 1);
965 struct mdinfo
*disks
= list
->devs
;
967 dev
= makedev(disks
->disk
.major
, disks
->disk
.minor
);
970 st
->ss
->free_super(st
);
974 static void try_spare_migration(struct state
*statelist
, struct alert_info
*info
)
978 struct spare_criteria sc
;
980 link_containers_with_subarrays(statelist
);
981 for (st
= statelist
; st
; st
= st
->next
)
982 if (st
->active
< st
->raid
&& st
->spare
== 0 && !st
->err
) {
983 struct domainlist
*domlist
= NULL
;
985 struct state
*to
= st
;
987 if (to
->parent_devnm
[0] && !to
->parent
)
988 /* subarray monitored without parent container
989 * we can't move spares here */
993 /* member of a container */
996 if (get_required_spare_criteria(to
, &sc
))
998 if (to
->metadata
->ss
->external
) {
999 /* We must make sure there is
1000 * no suitable spare in container already.
1001 * If there is we don't add more */
1002 dev_t devid
= container_choose_spare(
1003 to
, to
, NULL
, &sc
, st
->active
);
1007 for (d
= 0; d
< MAX_DISKS
; d
++)
1009 domainlist_add_dev(&domlist
,
1011 to
->metadata
->ss
->name
);
1012 if (to
->spare_group
)
1013 domain_add(&domlist
, to
->spare_group
);
1015 * No spare migration if the destination
1016 * has no domain. Skip this array.
1020 for (from
=statelist
; from
; from
=from
->next
) {
1022 if (!check_donor(from
, to
))
1024 if (from
->metadata
->ss
->external
)
1025 devid
= container_choose_spare(
1026 from
, to
, domlist
, &sc
, 0);
1028 devid
= choose_spare(from
, to
, domlist
,
1031 move_spare(from
->devname
, to
->devname
,
1033 alert("MoveSpare", to
->devname
,
1034 from
->devname
, info
);
1038 domain_free(domlist
);
1042 /* search the statelist to connect external
1043 * metadata subarrays with their containers
1044 * We always completely rebuild the tree from scratch as
1045 * that is safest considering the possibility of entries
1046 * disappearing or changing.
1048 static void link_containers_with_subarrays(struct state
*list
)
1052 for (st
= list
; st
; st
= st
->next
) {
1054 st
->subarray
= NULL
;
1056 for (st
= list
; st
; st
= st
->next
)
1057 if (st
->parent_devnm
[0])
1058 for (cont
= list
; cont
; cont
= cont
->next
)
1059 if (!cont
->err
&& cont
->parent_devnm
[0] == 0 &&
1060 strcmp(cont
->devnm
, st
->parent_devnm
) == 0) {
1062 st
->subarray
= cont
->subarray
;
1063 cont
->subarray
= st
;
1069 * free_statelist() - Frees statelist.
1070 * @statelist: statelist to free
1072 static void free_statelist(struct state
*statelist
)
1074 struct state
*tmp
= NULL
;
1077 if (statelist
->spare_group
)
1078 free(statelist
->spare_group
);
1081 statelist
= statelist
->next
;
1087 /* function: check_udev_activity
1088 * Description: Function waits for udev to finish
1089 * events processing.
1091 * 1 - detected error while opening udev
1093 * 0 - successfull completion
1095 static int check_udev_activity(void)
1097 struct udev
*udev
= NULL
;
1098 struct udev_queue
*udev_queue
= NULL
;
1099 int timeout_cnt
= 30;
1103 * In rare cases systemd may not have udevm,
1104 * in such cases just exit with rc 0
1115 udev_queue
= udev_queue_new(udev
);
1121 if (udev_queue_get_queue_is_empty(udev_queue
))
1124 while (!udev_queue_get_queue_is_empty(udev_queue
)) {
1137 udev_queue_unref(udev_queue
);
1144 /* Not really Monitor but ... */
1151 int frozen_remaining
= 3;
1153 if (!stat_is_blkdev(dev
, &rdev
))
1156 tmp
= devid2devnm(rdev
);
1158 pr_err("Cannot get md device name.\n");
1165 struct mdstat_ent
*ms
= mdstat_read(1, 0);
1166 struct mdstat_ent
*e
;
1168 for (e
= ms
; e
; e
= e
->next
)
1169 if (strcmp(e
->devnm
, devnm
) == 0)
1172 if (e
&& e
->percent
== RESYNC_NONE
) {
1173 /* We could be in the brief pause before something
1174 * starts. /proc/mdstat doesn't show that, but
1180 if (sysfs_init(&mdi
, -1, devnm
))
1182 if (sysfs_get_str(&mdi
, NULL
, "sync_action",
1184 strcmp(buf
,"idle\n") != 0) {
1185 e
->percent
= RESYNC_UNKNOWN
;
1186 if (strcmp(buf
, "frozen\n") == 0) {
1187 if (frozen_remaining
== 0)
1188 e
->percent
= RESYNC_NONE
;
1190 frozen_remaining
-= 1;
1194 if (!e
|| e
->percent
== RESYNC_NONE
) {
1195 if (e
&& e
->metadata_version
&&
1196 strncmp(e
->metadata_version
, "external:", 9) == 0) {
1197 if (is_subarray(&e
->metadata_version
[9]))
1198 ping_monitor(&e
->metadata_version
[9]);
1200 ping_monitor(devnm
);
1211 /* The state "broken" is used only for RAID0/LINEAR - it's the same as
1212 * "clean", but used in case the array has one or more members missing.
1214 static char *clean_states
[] = {
1215 "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL
};
1217 int WaitClean(char *dev
, int verbose
)
1224 if (!stat_is_blkdev(dev
, NULL
))
1226 fd
= open(dev
, O_RDONLY
);
1229 pr_err("Couldn't open %s: %s\n", dev
, strerror(errno
));
1233 strcpy(devnm
, fd2devnm(fd
));
1234 mdi
= sysfs_read(fd
, devnm
, GET_VERSION
|GET_LEVEL
|GET_SAFEMODE
);
1237 pr_err("Failed to read sysfs attributes for %s\n", dev
);
1242 switch(mdi
->array
.level
) {
1244 case LEVEL_MULTIPATH
:
1246 /* safemode delay is irrelevant for these levels */
1250 /* for internal metadata the kernel handles the final clean
1251 * transition, containers can never be dirty
1253 if (!is_subarray(mdi
->text_version
))
1256 /* safemode disabled ? */
1257 if (mdi
->safe_mode_delay
== 0)
1261 int state_fd
= sysfs_open(fd2devnm(fd
), NULL
, "array_state");
1265 /* minimize the safe_mode_delay and prepare to wait up to 5s
1266 * for writes to quiesce
1268 sysfs_set_safemode(mdi
, 1);
1270 /* wait for array_state to be clean */
1272 rv
= read(state_fd
, buf
, sizeof(buf
));
1275 if (sysfs_match_word(buf
, clean_states
) <
1276 (int)ARRAY_SIZE(clean_states
) - 1)
1278 rv
= sysfs_wait(state_fd
, &delay
);
1279 if (rv
< 0 && errno
!= EINTR
)
1281 lseek(state_fd
, 0, SEEK_SET
);
1285 else if (ping_monitor(mdi
->text_version
) == 0) {
1286 /* we need to ping to close the window between array
1287 * state transitioning to clean and the metadata being
1293 pr_err("Error connecting monitor with %s\n", dev
);
1296 pr_err("Error waiting for %s to be clean\n", dev
);
1298 /* restore the original safe_mode_delay */
1299 sysfs_set_safemode(mdi
, mdi
->safe_mode_delay
);