2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
33 #define TASK_COMM_LEN 16
34 #define EVENT_NAME_MAX 32
35 #define AUTOREBUILD_PID_PATH MDMON_DIR "/autorebuild.pid"
36 #define FALLBACK_DELAY 5
39 * struct state - external array or container properties.
40 * @devname: has length of %DEV_MD_DIR + device name + terminating byte
41 * @devnm: to sync with mdstat info
42 * @parent_devnm: or subarray, devnm of parent, for others, ""
43 * @subarray: for a container it is a link to first subarray, for a subarray it is a link to next
44 * subarray in the same container
45 * @parent: for a subarray it is a link to its container
48 char devname
[MD_NAME_MAX
+ sizeof(DEV_MD_DIR
)];
49 char devnm
[MD_NAME_MAX
];
53 int active
, working
, failed
, spare
, raid
;
57 int devstate
[MAX_DISKS
];
58 dev_t devid
[MAX_DISKS
];
60 char parent_devnm
[MD_NAME_MAX
];
61 struct supertype
*metadata
;
62 struct state
*subarray
;
68 char hostname
[HOST_NAME_MAX
];
77 EVENT_SPARE_ACTIVE
= 0,
81 __SYSLOG_PRIORITY_WARNING
,
82 EVENT_REBUILD_STARTED
,
84 EVENT_REBUILD_FINISHED
,
86 __SYSLOG_PRIORITY_CRITICAL
,
87 EVENT_DEVICE_DISAPPEARED
,
94 mapping_t events_map
[] = {
95 {"SpareActive", EVENT_SPARE_ACTIVE
},
96 {"NewArray", EVENT_NEW_ARRAY
},
97 {"MoveSpare", EVENT_MOVE_SPARE
},
98 {"TestMessage", EVENT_TEST_MESSAGE
},
99 {"RebuildStarted", EVENT_REBUILD_STARTED
},
100 {"Rebuild", EVENT_REBUILD
},
101 {"RebuildFinished", EVENT_REBUILD_FINISHED
},
102 {"SparesMissing", EVENT_SPARES_MISSING
},
103 {"DeviceDisappeared", EVENT_DEVICE_DISAPPEARED
},
104 {"Fail", EVENT_FAIL
},
105 {"FailSpare", EVENT_FAIL_SPARE
},
106 {"DegradedArray", EVENT_DEGRADED_ARRAY
},
107 {NULL
, EVENT_UNKNOWN
}
111 enum event event_enum
;
113 * @event_name: Rebuild event name must be in form "RebuildXX", where XX is rebuild progress.
115 char event_name
[EVENT_NAME_MAX
];
116 char message
[BUFSIZ
];
117 const char *description
;
122 static int add_new_arrays(struct mdstat_ent
*mdstat
, struct state
**statelist
);
123 static void try_spare_migration(struct state
*statelist
);
124 static void link_containers_with_subarrays(struct state
*list
);
125 static void free_statelist(struct state
*statelist
);
126 static int check_array(struct state
*st
, struct mdstat_ent
*mdstat
, int increments
, char *prefer
);
127 static int check_one_sharer(int scan
);
128 static void link_containers_with_subarrays(struct state
*list
);
129 static int make_daemon(char *pidfile
);
130 static void try_spare_migration(struct state
*statelist
);
131 static void wait_for_events(int *delay_for_event
, int c_delay
);
132 static void wait_for_events_mdstat(int *delay_for_event
, int c_delay
);
133 static int write_autorebuild_pid(void);
135 int Monitor(struct mddev_dev
*devlist
,
136 char *mailaddr
, char *alert_cmd
,
138 int daemonise
, int oneshot
,
139 int dosyslog
, char *pidfile
, int increments
,
143 * Every few seconds, scan every md device looking for changes
144 * When a change is found, log it, possibly run the alert command,
145 * and possibly send Email
147 * For each array, we record:
149 * active/working/failed/spare drives
150 * State of each device.
151 * %rebuilt if rebuilding
153 * If the update time changes, check out all the data again
154 * It is possible that we cannot get the state of each device
155 * due to bugs in the md kernel module.
156 * We also read /proc/mdstat to get rebuild percent,
157 * and to get state on all active devices incase of kernel bug.
161 * An active device had Faulty set or Active/Sync removed
163 * A spare device had Faulty set
165 * An active device had a reverse transition
167 * percent went from -1 to +ve
169 * percent went from below to not-below NN%
171 * Couldn't access a device which was previously visible
173 * if we detect an array with active<raid and spare==0
174 * we look at other arrays that have same spare-group
175 * If we find one with active==raid and spare>0,
176 * and if we can get_disk_info and find a name
177 * Then we hot-remove and hot-add to the other array
179 * If devlist is NULL, then we can monitor everything if --scan
180 * was given. We get an initial list from config file and add anything
181 * that appears in /proc/mdstat
184 struct state
*statelist
= NULL
;
186 struct mdstat_ent
*mdstat
= NULL
;
188 struct mddev_ident
*mdlist
;
189 int delay_for_event
= c
->delay
;
191 if (devlist
&& c
->scan
) {
192 pr_err("Devices list and --scan option cannot be combined - not monitoring.\n");
197 mailaddr
= conf_get_mailaddr();
200 alert_cmd
= conf_get_program();
202 mailfrom
= conf_get_mailfrom();
204 if (c
->scan
&& !mailaddr
&& !alert_cmd
&& !dosyslog
) {
205 pr_err("No mail address or alert command - not monitoring.\n");
210 pr_err("Monitor is started with delay %ds\n", c
->delay
);
212 pr_err("Monitor using email address %s\n", mailaddr
);
214 pr_err("Monitor using program %s\n", alert_cmd
);
217 info
.alert_cmd
= alert_cmd
;
218 info
.mailaddr
= mailaddr
;
219 info
.mailfrom
= mailfrom
;
220 info
.dosyslog
= dosyslog
;
223 if (s_gethostname(info
.hostname
, sizeof(info
.hostname
)) != 0) {
224 pr_err("Cannot get hostname.\n");
229 if (check_one_sharer(c
->scan
) == 2)
234 int rv
= make_daemon(pidfile
);
240 if (write_autorebuild_pid() != 0)
243 if (devlist
== NULL
) {
244 mdlist
= conf_get_ident(NULL
);
245 for (; mdlist
; mdlist
= mdlist
->next
) {
248 if (mdlist
->devname
== NULL
)
250 if (is_devname_ignore(mdlist
->devname
) == true)
252 if (!is_mddev(mdlist
->devname
))
255 st
= xcalloc(1, sizeof *st
);
256 snprintf(st
->devname
, MD_NAME_MAX
+ sizeof(DEV_MD_DIR
), DEV_MD_DIR
"%s",
257 basename(mdlist
->devname
));
258 st
->next
= statelist
;
260 st
->percent
= RESYNC_UNKNOWN
;
262 st
->expected_spares
= mdlist
->spare_disks
;
263 if (mdlist
->spare_group
)
264 st
->spare_group
= xstrdup(mdlist
->spare_group
);
268 struct mddev_dev
*dv
;
270 for (dv
= devlist
; dv
; dv
= dv
->next
) {
273 if (!is_mddev(dv
->devname
))
276 st
= xcalloc(1, sizeof *st
);
277 mdlist
= conf_get_ident(dv
->devname
);
278 snprintf(st
->devname
, MD_NAME_MAX
+ sizeof(DEV_MD_DIR
), "%s", dv
->devname
);
279 st
->next
= statelist
;
281 st
->percent
= RESYNC_UNKNOWN
;
282 st
->expected_spares
= -1;
284 st
->expected_spares
= mdlist
->spare_disks
;
285 if (mdlist
->spare_group
)
286 st
->spare_group
= xstrdup(mdlist
->spare_group
);
294 struct state
*st
, **stp
;
296 int anyredundant
= 0;
300 mdstat
= mdstat_read(oneshot
? 0 : 1, 0);
302 for (st
= statelist
; st
; st
= st
->next
) {
303 if (check_array(st
, mdstat
, increments
, c
->prefer
))
305 /* for external arrays, metadata is filled for
308 if (st
->metadata
&& st
->metadata
->ss
->external
)
310 if (st
->err
== 0 && !anyredundant
)
314 /* now check if there are any new devices found in mdstat */
316 new_found
= add_new_arrays(mdstat
, &statelist
);
318 /* If an array has active < raid && spare == 0 && spare_group != NULL
319 * Look for another array with spare > 0 and active == raid and same spare_group
320 * if found, choose a device and hotremove/hotadd
322 if (share
&& anydegraded
)
323 try_spare_migration(statelist
);
328 pr_err("No array with redundancy detected, stopping\n");
332 wait_for_events(&delay_for_event
, c
->delay
);
336 for (stp
= &statelist
; (st
= *stp
) != NULL
; ) {
337 if (st
->from_auto
&& st
->err
> 5) {
340 free(st
->spare_group
);
348 free_statelist(statelist
);
356 * wait_for_events() - Waits for events on md devices.
357 * @delay_for_event: pointer to current event delay
358 * @c_delay: delay from config
360 static void wait_for_events(int *delay_for_event
, int c_delay
)
363 if (udev_is_available()) {
364 if (udev_wait_for_events(*delay_for_event
) == UDEV_STATUS_ERROR
)
365 pr_err("Error while waiting for udev events.\n");
369 wait_for_events_mdstat(delay_for_event
, c_delay
);
373 * wait_for_events_mdstat() - Waits for events on mdstat.
374 * @delay_for_event: pointer to current event delay
375 * @c_delay: delay from config
377 static void wait_for_events_mdstat(int *delay_for_event
, int c_delay
)
379 int wait_result
= mdstat_wait(*delay_for_event
);
381 if (wait_result
< 0) {
382 pr_err("Error while waiting for events on mdstat.\n");
387 * Give chance to process new device
389 if (wait_result
!= 0) {
390 if (c_delay
> FALLBACK_DELAY
)
391 *delay_for_event
= FALLBACK_DELAY
;
393 *delay_for_event
= c_delay
;
398 static int make_daemon(char *pidfile
)
401 * -1 in the forked daemon
404 * so a none-negative becomes the exit code.
411 FILE *pid_file
= NULL
;
412 int fd
= open(pidfile
, O_WRONLY
| O_CREAT
| O_TRUNC
,
415 pid_file
= fdopen(fd
, "w");
417 perror("cannot create pid file");
419 fprintf(pid_file
,"%d\n", pid
);
435 * check_one_sharer() - Checks for other mdmon processes running.
438 * 0 - no other processes running,
440 * 2 - error, or when scan mode is enabled, and one mdmon process already exists
442 static int check_one_sharer(int scan
)
446 char comm_path
[PATH_MAX
];
447 char comm
[TASK_COMM_LEN
];
449 if (!is_directory(MDMON_DIR
)) {
450 pr_err("%s is not a regular directory.\n", MDMON_DIR
);
454 fp
= fopen(AUTOREBUILD_PID_PATH
, "r");
456 /* PID file does not exist */
460 pr_err("Cannot open %s file.\n", AUTOREBUILD_PID_PATH
);
464 if (!is_file(AUTOREBUILD_PID_PATH
)) {
465 pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH
);
470 if (fscanf(fp
, "%d", &pid
) != 1) {
471 pr_err("Cannot read pid from %s file.\n", AUTOREBUILD_PID_PATH
);
476 snprintf(comm_path
, sizeof(comm_path
), "/proc/%d/comm", pid
);
478 comm_fp
= fopen(comm_path
, "r");
480 dprintf("Warning: Cannot open %s, continuing\n", comm_path
);
485 if (fscanf(comm_fp
, "%15s", comm
) == 0) {
486 dprintf("Warning: Cannot read comm from %s, continuing\n", comm_path
);
492 if (strncmp(basename(comm
), Name
, strlen(Name
)) == 0) {
494 pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
499 pr_err("Warning: One autorebuild process already running.\n");
507 * write_autorebuild_pid() - Writes pid to autorebuild.pid file.
509 * Return: 0 on success, 1 on error
511 static int write_autorebuild_pid(void)
516 if (mkdir(MDMON_DIR
, 0700) < 0 && errno
!= EEXIST
) {
517 pr_err("%s: %s\n", strerror(errno
), MDMON_DIR
);
521 if (!is_directory(MDMON_DIR
)) {
522 pr_err("%s is not a regular directory.\n", MDMON_DIR
);
526 fd
= open(AUTOREBUILD_PID_PATH
, O_WRONLY
| O_CREAT
| O_TRUNC
, 0700);
529 pr_err("Error opening %s file.\n", AUTOREBUILD_PID_PATH
);
533 fp
= fdopen(fd
, "w");
536 pr_err("Error opening fd for %s file.\n", AUTOREBUILD_PID_PATH
);
540 fprintf(fp
, "%d\n", getpid());
546 #define BASE_MESSAGE "%s event detected on md device %s"
547 #define COMPONENT_DEVICE_MESSAGE ", component device %s"
548 #define DESCRIPTION_MESSAGE ": %s"
550 * sprint_event_message() - Writes basic message about detected event to destination ptr.
551 * @dest: message destination, should be at least the size of BUFSIZ
554 * Return: 0 on success, 1 on error
556 static int sprint_event_message(char *dest
, const struct event_data
*data
)
561 if (data
->disc
&& data
->description
)
562 snprintf(dest
, BUFSIZ
, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE DESCRIPTION_MESSAGE
,
563 data
->event_name
, data
->dev
, data
->disc
, data
->description
);
565 snprintf(dest
, BUFSIZ
, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE
,
566 data
->event_name
, data
->dev
, data
->disc
);
567 else if (data
->description
)
568 snprintf(dest
, BUFSIZ
, BASE_MESSAGE DESCRIPTION_MESSAGE
,
569 data
->event_name
, data
->dev
, data
->description
);
571 snprintf(dest
, BUFSIZ
, BASE_MESSAGE
, data
->event_name
, data
->dev
);
577 * get_syslog_event_priority() - Determines event priority.
578 * @event_enum: event to be checked
580 * Return: LOG_CRIT, LOG_WARNING or LOG_INFO
582 static int get_syslog_event_priority(const enum event event_enum
)
584 if (event_enum
> __SYSLOG_PRIORITY_CRITICAL
)
586 if (event_enum
> __SYSLOG_PRIORITY_WARNING
)
592 * is_email_event() - Determines whether email for event should be sent or not.
593 * @event_enum: event to be checked
595 * Return: true if email should be sent, false otherwise
597 static bool is_email_event(const enum event event_enum
)
599 static const enum event email_events
[] = {
602 EVENT_DEGRADED_ARRAY
,
603 EVENT_SPARES_MISSING
,
608 for (i
= 0; i
< ARRAY_SIZE(email_events
); ++i
) {
609 if (event_enum
== email_events
[i
])
616 * execute_alert_cmd() - Forks and executes command provided as alert_cmd.
619 static void execute_alert_cmd(const struct event_data
*data
)
625 waitpid(pid
, NULL
, 0);
628 pr_err("Cannot fork to execute alert command");
631 execl(info
.alert_cmd
, info
.alert_cmd
, data
->event_name
, data
->dev
, data
->disc
, NULL
);
637 * send_event_email() - Sends an email about event detected by monitor.
640 static void send_event_email(const struct event_data
*data
)
646 mp
= popen(Sendmail
, "w");
648 pr_err("Cannot open pipe stream for sendmail.\n");
652 signal(SIGPIPE
, SIG_IGN
);
654 fprintf(mp
, "From: %s\n", info
.mailfrom
);
656 fprintf(mp
, "From: %s monitoring <root>\n", Name
);
657 fprintf(mp
, "To: %s\n", info
.mailaddr
);
658 fprintf(mp
, "Subject: %s event on %s:%s\n\n", data
->event_name
, data
->dev
, info
.hostname
);
659 fprintf(mp
, "This is an automatically generated mail message.\n");
660 fprintf(mp
, "%s\n", data
->message
);
662 mdstat
= fopen("/proc/mdstat", "r");
664 pr_err("Cannot open /proc/mdstat\n");
669 fprintf(mp
, "The /proc/mdstat file currently contains the following:\n\n");
670 while ((n
= fread(buf
, 1, sizeof(buf
), mdstat
)) > 0)
671 n
= fwrite(buf
, 1, n
, mp
);
677 * log_event_to_syslog() - Logs an event into syslog.
680 static void log_event_to_syslog(const struct event_data
*data
)
684 priority
= get_syslog_event_priority(data
->event_enum
);
686 syslog(priority
, "%s\n", data
->message
);
690 * alert() - Alerts about the monitor event.
691 * @event_enum: event to be sent
692 * @description: event description
693 * @progress: rebuild progress
694 * @dev: md device name
695 * @disc: component device
697 * If needed function executes alert command, sends an email or logs event to syslog.
699 static void alert(const enum event event_enum
, const char *description
, const uint8_t progress
,
700 const char *dev
, const char *disc
)
702 struct event_data data
= {.dev
= dev
, .disc
= disc
, .description
= description
};
707 if (event_enum
== EVENT_REBUILD
) {
708 snprintf(data
.event_name
, sizeof(data
.event_name
), "%s%02d",
709 map_num_s(events_map
, EVENT_REBUILD
), progress
);
711 snprintf(data
.event_name
, sizeof(data
.event_name
), "%s", map_num_s(events_map
, event_enum
));
714 data
.event_enum
= event_enum
;
716 if (sprint_event_message(data
.message
, &data
) != 0) {
717 pr_err("Cannot create event message.\n");
720 pr_err("%s\n", data
.message
);
723 execute_alert_cmd(&data
);
725 if (info
.mailaddr
&& is_email_event(event_enum
))
726 send_event_email(&data
);
729 log_event_to_syslog(&data
);
732 static int check_array(struct state
*st
, struct mdstat_ent
*mdstat
,
733 int increments
, char *prefer
)
735 /* Update the state 'st' to reflect any changes shown in mdstat,
736 * or found by directly examining the array, and return
737 * '1' if the array is degraded, or '0' if it is optimal (or dead).
739 struct { int state
, major
, minor
; } disks_info
[MAX_DISKS
];
740 struct mdinfo
*sra
= NULL
;
741 mdu_array_info_t array
;
742 struct mdstat_ent
*mse
= NULL
, *mse2
;
743 char *dev
= st
->devname
;
750 int is_container
= 0;
751 unsigned long redundancy_only_flags
= 0;
754 alert(EVENT_TEST_MESSAGE
, NULL
, 0, dev
, NULL
);
758 fd
= open(dev
, O_RDONLY
);
762 if (st
->devnm
[0] == 0)
763 snprintf(st
->devnm
, MD_NAME_MAX
, "%s", fd2devnm(fd
));
765 for (mse2
= mdstat
; mse2
; mse2
= mse2
->next
)
766 if (strcmp(mse2
->devnm
, st
->devnm
) == 0) {
767 mse2
->devnm
[0] = 0; /* flag it as "used" */
772 /* duplicated array in statelist
773 * or re-created after reading mdstat
779 if (mse
->level
== NULL
)
782 if (!is_container
&& !md_array_active(fd
))
785 fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
786 if (md_get_array_info(fd
, &array
) < 0)
789 if (!is_container
&& map_name(pers
, mse
->level
) > 0)
790 redundancy_only_flags
|= GET_MISMATCH
;
792 sra
= sysfs_read(-1, st
->devnm
, GET_LEVEL
| GET_DISKS
| GET_DEVS
|
793 GET_STATE
| redundancy_only_flags
);
798 /* It's much easier to list what array levels can't
799 * have a device disappear than all of them that can
801 if (sra
->array
.level
== 0 || sra
->array
.level
== -1) {
802 if (!st
->err
&& !st
->from_config
)
803 alert(EVENT_DEVICE_DISAPPEARED
, "Wrong-Level", 0, dev
, NULL
);
808 /* this array is in /proc/mdstat */
809 if (array
.utime
== 0)
810 /* external arrays don't update utime, so
811 * just make sure it is always different. */
812 array
.utime
= st
->utime
+ 1;;
815 /* New array appeared where previously had an error */
817 st
->percent
= RESYNC_NONE
;
820 alert(EVENT_NEW_ARRAY
, NULL
, 0, st
->devname
, NULL
);
823 if (st
->utime
== array
.utime
&& st
->failed
== sra
->array
.failed_disks
&&
824 st
->working
== sra
->array
.working_disks
&&
825 st
->spare
== sra
->array
.spare_disks
&&
826 (mse
== NULL
|| (mse
->percent
== st
->percent
))) {
827 if ((st
->active
< st
->raid
) && st
->spare
== 0)
831 if (st
->utime
== 0 && /* new array */
832 mse
->pattern
&& strchr(mse
->pattern
, '_') /* degraded */)
833 alert(EVENT_DEGRADED_ARRAY
, NULL
, 0, dev
, NULL
);
835 if (st
->utime
== 0 && /* new array */ st
->expected_spares
> 0 &&
836 sra
->array
.spare_disks
< st
->expected_spares
)
837 alert(EVENT_SPARES_MISSING
, NULL
, 0, dev
, NULL
);
838 if (st
->percent
< 0 && st
->percent
!= RESYNC_UNKNOWN
&&
840 alert(EVENT_REBUILD_STARTED
, NULL
, 0, dev
, NULL
);
841 if (st
->percent
>= 0 && mse
->percent
>= 0 &&
842 (mse
->percent
/ increments
) > (st
->percent
/ increments
)) {
843 if((mse
->percent
/ increments
) == 0)
844 alert(EVENT_REBUILD_STARTED
, NULL
, 0, dev
, NULL
);
846 alert(EVENT_REBUILD
, NULL
, mse
->percent
, dev
, NULL
);
849 if (mse
->percent
== RESYNC_NONE
&& st
->percent
>= 0) {
850 /* Rebuild/sync/whatever just finished.
851 * If there is a number in /mismatch_cnt,
852 * we should report that.
854 if (sra
&& sra
->mismatch_cnt
> 0) {
856 snprintf(cnt
, sizeof(cnt
),
857 " mismatches found: %d (on raid level %d)",
858 sra
->mismatch_cnt
, sra
->array
.level
);
859 alert(EVENT_REBUILD_FINISHED
, NULL
, 0, dev
, cnt
);
861 alert(EVENT_REBUILD_FINISHED
, NULL
, 0, dev
, NULL
);
863 st
->percent
= mse
->percent
;
865 remaining_disks
= sra
->array
.nr_disks
;
866 for (i
= 0; i
< MAX_DISKS
&& remaining_disks
> 0; i
++) {
867 mdu_disk_info_t disc
;
869 if (md_get_disk_info(fd
, &disc
) >= 0) {
870 disks_info
[i
].state
= disc
.state
;
871 disks_info
[i
].major
= disc
.major
;
872 disks_info
[i
].minor
= disc
.minor
;
873 if (disc
.major
|| disc
.minor
)
876 disks_info
[i
].major
= disks_info
[i
].minor
= 0;
880 if (mse
->metadata_version
&&
881 strncmp(mse
->metadata_version
, "external:", 9) == 0 &&
882 is_subarray(mse
->metadata_version
+9)) {
884 snprintf(st
->parent_devnm
, MD_NAME_MAX
, "%s", mse
->metadata_version
+ 10);
885 sl
= strchr(st
->parent_devnm
, '/');
889 st
->parent_devnm
[0] = 0;
890 if (st
->metadata
== NULL
&& st
->parent_devnm
[0] == 0)
891 st
->metadata
= super_by_fd(fd
, NULL
);
893 for (i
= 0; i
< MAX_DISKS
; i
++) {
894 mdu_disk_info_t disc
= {0, 0, 0, 0, 0};
899 if (i
< last_disk
&& (disks_info
[i
].major
|| disks_info
[i
].minor
)) {
900 newstate
= disks_info
[i
].state
;
901 dv
= map_dev_preferred(disks_info
[i
].major
, disks_info
[i
].minor
, 1,
903 disc
.state
= newstate
;
904 disc
.major
= disks_info
[i
].major
;
905 disc
.minor
= disks_info
[i
].minor
;
907 newstate
= (1 << MD_DISK_REMOVED
);
909 if (dv
== NULL
&& st
->devid
[i
])
910 dv
= map_dev_preferred(major(st
->devid
[i
]),
911 minor(st
->devid
[i
]), 1, prefer
);
912 change
= newstate
^ st
->devstate
[i
];
913 if (st
->utime
&& change
&& !st
->err
&& !new_array
) {
914 if ((st
->devstate
[i
]&change
) & (1 << MD_DISK_SYNC
))
915 alert(EVENT_FAIL
, NULL
, 0, dev
, dv
);
916 else if ((newstate
& (1 << MD_DISK_FAULTY
)) &&
917 (disc
.major
|| disc
.minor
) &&
918 st
->devid
[i
] == makedev(disc
.major
,
920 alert(EVENT_FAIL_SPARE
, NULL
, 0, dev
, dv
);
921 else if ((newstate
&change
) & (1 << MD_DISK_SYNC
))
922 alert(EVENT_SPARE_ACTIVE
, NULL
, 0, dev
, dv
);
924 st
->devstate
[i
] = newstate
;
925 st
->devid
[i
] = makedev(disc
.major
, disc
.minor
);
927 st
->active
= sra
->array
.active_disks
;
928 st
->working
= sra
->array
.working_disks
;
929 st
->spare
= sra
->array
.spare_disks
;
930 st
->failed
= sra
->array
.failed_disks
;
931 st
->utime
= array
.utime
;
932 st
->raid
= sra
->array
.raid_disks
;
934 if ((st
->active
< st
->raid
) && st
->spare
== 0)
945 if (!st
->err
&& !is_container
)
946 alert(EVENT_DEVICE_DISAPPEARED
, NULL
, 0, dev
, NULL
);
951 static int add_new_arrays(struct mdstat_ent
*mdstat
, struct state
**statelist
)
953 struct mdstat_ent
*mse
;
957 for (mse
= mdstat
; mse
; mse
= mse
->next
)
958 if (mse
->devnm
[0] && (!mse
->level
|| /* retrieve containers */
959 (strcmp(mse
->level
, "raid0") != 0 &&
960 strcmp(mse
->level
, "linear") != 0))) {
961 struct state
*st
= xcalloc(1, sizeof *st
);
962 mdu_array_info_t array
;
965 name
= get_md_name(mse
->devnm
);
971 snprintf(st
->devname
, MD_NAME_MAX
+ sizeof(DEV_MD_DIR
), "%s", name
);
972 if ((fd
= open(st
->devname
, O_RDONLY
)) < 0 ||
973 md_get_array_info(fd
, &array
) < 0) {
977 put_md_name(st
->devname
);
979 st
->metadata
->ss
->free_super(st
->metadata
);
986 st
->next
= *statelist
;
989 snprintf(st
->devnm
, MD_NAME_MAX
, "%s", mse
->devnm
);
990 st
->percent
= RESYNC_UNKNOWN
;
991 st
->expected_spares
= -1;
992 if (mse
->metadata_version
&&
993 strncmp(mse
->metadata_version
,
994 "external:", 9) == 0 &&
995 is_subarray(mse
->metadata_version
+9)) {
997 snprintf(st
->parent_devnm
, MD_NAME_MAX
,
998 "%s", mse
->metadata_version
+ 10);
999 sl
= strchr(st
->parent_devnm
, '/');
1002 st
->parent_devnm
[0] = 0;
1005 alert(EVENT_TEST_MESSAGE
, NULL
, 0, st
->devname
, NULL
);
1011 static int check_donor(struct state
*from
, struct state
*to
)
1018 /* Cannot move from a member */
1022 for (sub
= from
->subarray
; sub
; sub
= sub
->subarray
)
1023 /* If source array has degraded subarrays, don't
1026 if (sub
->active
< sub
->raid
)
1028 if (from
->metadata
->ss
->external
== 0)
1029 if (from
->active
< from
->raid
)
1031 if (from
->spare
<= 0)
1036 static dev_t
choose_spare(struct state
*from
, struct state
*to
,
1037 struct domainlist
*domlist
, struct spare_criteria
*sc
)
1042 for (d
= from
->raid
; !dev
&& d
< MAX_DISKS
; d
++) {
1043 if (from
->devid
[d
] > 0 && from
->devstate
[d
] == 0) {
1044 struct dev_policy
*pol
;
1046 if (to
->metadata
->ss
->external
&&
1047 test_partition_from_id(from
->devid
[d
]))
1050 if (devid_matches_criteria(from
->devid
[d
], sc
) == false)
1053 pol
= devid_policy(from
->devid
[d
]);
1054 if (from
->spare_group
)
1055 pol_add(&pol
, pol_domain
,
1056 from
->spare_group
, NULL
);
1057 if (domain_test(domlist
, pol
,
1058 to
->metadata
->ss
->name
) == 1)
1059 dev
= from
->devid
[d
];
1060 dev_policy_free(pol
);
1066 static dev_t
container_choose_spare(struct state
*from
, struct state
*to
,
1067 struct domainlist
*domlist
,
1068 struct spare_criteria
*sc
, int active
)
1070 /* This is similar to choose_spare, but we cannot trust devstate,
1071 * so we need to read the metadata instead
1073 struct mdinfo
*list
;
1074 struct supertype
*st
= from
->metadata
;
1075 int fd
= open(from
->devname
, O_RDONLY
);
1081 if (!st
->ss
->getinfo_super_disks
) {
1086 err
= st
->ss
->load_container(st
, fd
, NULL
);
1092 /* We must check if number of active disks has not increased
1093 * since ioctl in main loop. mdmon may have added spare
1094 * to subarray. If so we do not need to look for more spares
1095 * so return non zero value */
1098 list
= st
->ss
->getinfo_super_disks(st
);
1100 st
->ss
->free_super(st
);
1105 if (dp
->disk
.state
& (1 << MD_DISK_SYNC
) &&
1106 !(dp
->disk
.state
& (1 << MD_DISK_FAULTY
)))
1111 if (active
< active_cnt
) {
1112 /* Spare just activated.*/
1113 st
->ss
->free_super(st
);
1118 /* We only need one spare so full list not needed */
1119 list
= container_choose_spares(st
, sc
, domlist
, from
->spare_group
,
1120 to
->metadata
->ss
->name
, 1);
1122 struct mdinfo
*disks
= list
->devs
;
1124 dev
= makedev(disks
->disk
.major
, disks
->disk
.minor
);
1127 st
->ss
->free_super(st
);
1131 static void try_spare_migration(struct state
*statelist
)
1136 link_containers_with_subarrays(statelist
);
1137 for (st
= statelist
; st
; st
= st
->next
)
1138 if (st
->active
< st
->raid
&& st
->spare
== 0 && !st
->err
) {
1139 struct domainlist
*domlist
= NULL
;
1140 struct spare_criteria sc
= {0};
1142 struct state
*to
= st
;
1144 if (to
->parent_devnm
[0] && !to
->parent
)
1145 /* subarray monitored without parent container
1146 * we can't move spares here */
1150 /* member of a container */
1153 if (to
->metadata
->ss
->get_spare_criteria
)
1154 if (to
->metadata
->ss
->get_spare_criteria(to
->metadata
, to
->devname
,
1158 if (to
->metadata
->ss
->external
) {
1159 /* We must make sure there is
1160 * no suitable spare in container already.
1161 * If there is we don't add more */
1162 dev_t devid
= container_choose_spare(
1163 to
, to
, NULL
, &sc
, st
->active
);
1167 for (d
= 0; d
< MAX_DISKS
; d
++)
1169 domainlist_add_dev(&domlist
,
1171 to
->metadata
->ss
->name
);
1172 if (to
->spare_group
)
1173 domain_add(&domlist
, to
->spare_group
);
1175 * No spare migration if the destination
1176 * has no domain. Skip this array.
1180 for (from
=statelist
; from
; from
=from
->next
) {
1182 if (!check_donor(from
, to
))
1184 if (from
->metadata
->ss
->external
)
1185 devid
= container_choose_spare(
1186 from
, to
, domlist
, &sc
, 0);
1188 devid
= choose_spare(from
, to
, domlist
,
1191 move_spare(from
->devname
, to
->devname
,
1193 alert(EVENT_MOVE_SPARE
, NULL
, 0, to
->devname
, from
->devname
);
1197 domain_free(domlist
);
1201 /* search the statelist to connect external
1202 * metadata subarrays with their containers
1203 * We always completely rebuild the tree from scratch as
1204 * that is safest considering the possibility of entries
1205 * disappearing or changing.
1207 static void link_containers_with_subarrays(struct state
*list
)
1211 for (st
= list
; st
; st
= st
->next
) {
1213 st
->subarray
= NULL
;
1215 for (st
= list
; st
; st
= st
->next
)
1216 if (st
->parent_devnm
[0])
1217 for (cont
= list
; cont
; cont
= cont
->next
)
1218 if (!cont
->err
&& cont
->parent_devnm
[0] == 0 &&
1219 strcmp(cont
->devnm
, st
->parent_devnm
) == 0) {
1221 st
->subarray
= cont
->subarray
;
1222 cont
->subarray
= st
;
1228 * free_statelist() - Frees statelist.
1229 * @statelist: statelist to free
1231 static void free_statelist(struct state
*statelist
)
1233 struct state
*tmp
= NULL
;
1236 if (statelist
->spare_group
)
1237 free(statelist
->spare_group
);
1240 statelist
= statelist
->next
;
1245 /* Not really Monitor but ... */
1252 int frozen_remaining
= 3;
1254 if (!stat_is_blkdev(dev
, &rdev
))
1257 tmp
= devid2devnm(rdev
);
1259 pr_err("Cannot get md device name.\n");
1266 struct mdstat_ent
*ms
= mdstat_read(1, 0);
1267 struct mdstat_ent
*e
;
1269 for (e
= ms
; e
; e
= e
->next
)
1270 if (strcmp(e
->devnm
, devnm
) == 0)
1273 if (e
&& e
->percent
== RESYNC_NONE
) {
1274 /* We could be in the brief pause before something
1275 * starts. /proc/mdstat doesn't show that, but
1279 char buf
[SYSFS_MAX_BUF_SIZE
];
1281 if (sysfs_init(&mdi
, -1, devnm
))
1283 if (sysfs_get_str(&mdi
, NULL
, "sync_action",
1284 buf
, sizeof(buf
)) > 0 &&
1285 strcmp(buf
,"idle\n") != 0) {
1286 e
->percent
= RESYNC_UNKNOWN
;
1287 if (strcmp(buf
, "frozen\n") == 0) {
1288 if (frozen_remaining
== 0)
1289 e
->percent
= RESYNC_NONE
;
1291 frozen_remaining
-= 1;
1295 if (!e
|| e
->percent
== RESYNC_NONE
) {
1296 if (e
&& e
->metadata_version
&&
1297 strncmp(e
->metadata_version
, "external:", 9) == 0) {
1298 if (is_subarray(&e
->metadata_version
[9]))
1299 ping_monitor(&e
->metadata_version
[9]);
1301 ping_monitor(devnm
);
1312 /* The state "broken" is used only for RAID0/LINEAR - it's the same as
1313 * "clean", but used in case the array has one or more members missing.
1315 static char *clean_states
[] = {
1316 "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL
};
1318 int WaitClean(char *dev
, int verbose
)
1325 if (!stat_is_blkdev(dev
, NULL
))
1327 fd
= open(dev
, O_RDONLY
);
1330 pr_err("Couldn't open %s: %s\n", dev
, strerror(errno
));
1334 strcpy(devnm
, fd2devnm(fd
));
1335 mdi
= sysfs_read(fd
, devnm
, GET_VERSION
|GET_LEVEL
|GET_SAFEMODE
);
1338 pr_err("Failed to read sysfs attributes for %s\n", dev
);
1343 switch(mdi
->array
.level
) {
1345 case LEVEL_MULTIPATH
:
1347 /* safemode delay is irrelevant for these levels */
1351 /* for internal metadata the kernel handles the final clean
1352 * transition, containers can never be dirty
1354 if (!is_subarray(mdi
->text_version
))
1357 /* safemode disabled ? */
1358 if (mdi
->safe_mode_delay
== 0)
1362 int state_fd
= sysfs_open(fd2devnm(fd
), NULL
, "array_state");
1363 char buf
[SYSFS_MAX_BUF_SIZE
];
1366 /* minimize the safe_mode_delay and prepare to wait up to 5s
1367 * for writes to quiesce
1369 sysfs_set_safemode(mdi
, 1);
1371 /* wait for array_state to be clean */
1373 rv
= read(state_fd
, buf
, sizeof(buf
));
1376 if (sysfs_match_word(buf
, clean_states
) <
1377 (int)ARRAY_SIZE(clean_states
) - 1)
1379 rv
= sysfs_wait(state_fd
, &delay
);
1380 if (rv
< 0 && errno
!= EINTR
)
1382 lseek(state_fd
, 0, SEEK_SET
);
1386 else if (ping_monitor(mdi
->text_version
) == 0) {
1387 /* we need to ping to close the window between array
1388 * state transitioning to clean and the metadata being
1394 pr_err("Error connecting monitor with %s\n", dev
);
1397 pr_err("Error waiting for %s to be clean\n", dev
);
1399 /* restore the original safe_mode_delay */
1400 sysfs_set_safemode(mdi
, mdi
->safe_mode_delay
);