]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Monitor.c
Mdmonitor: Add helper functions
[thirdparty/mdadm.git] / Monitor.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_p.h"
27 #include "md_u.h"
28 #include <sys/wait.h>
29 #include <limits.h>
30 #include <syslog.h>
31 #ifndef NO_LIBUDEV
32 #include <libudev.h>
33 #endif
34
35 #define EVENT_NAME_MAX 32
36
37 struct state {
38 char devname[MD_NAME_MAX + sizeof("/dev/md/")]; /* length of "/dev/md/" + device name + terminating byte*/
39 char devnm[MD_NAME_MAX]; /* to sync with mdstat info */
40 unsigned int utime;
41 int err;
42 char *spare_group;
43 int active, working, failed, spare, raid;
44 int from_config;
45 int from_auto;
46 int expected_spares;
47 int devstate[MAX_DISKS];
48 dev_t devid[MAX_DISKS];
49 int percent;
50 char parent_devnm[MD_NAME_MAX]; /* For subarray, devnm of parent.
51 * For others, ""
52 */
53 struct supertype *metadata;
54 struct state *subarray;/* for a container it is a link to first subarray
55 * for a subarray it is a link to next subarray
56 * in the same container */
57 struct state *parent; /* for a subarray it is a link to its container
58 */
59 struct state *next;
60 };
61
62 struct alert_info {
63 char hostname[HOST_NAME_MAX];
64 char *mailaddr;
65 char *mailfrom;
66 char *alert_cmd;
67 int dosyslog;
68 int test;
69 } info;
70
71 enum event {
72 EVENT_SPARE_ACTIVE = 0,
73 EVENT_NEW_ARRAY,
74 EVENT_MOVE_SPARE,
75 EVENT_TEST_MESSAGE,
76 __SYSLOG_PRIORITY_WARNING,
77 EVENT_REBUILD_STARTED,
78 EVENT_REBUILD,
79 EVENT_REBUILD_FINISHED,
80 EVENT_SPARES_MISSING,
81 __SYSLOG_PRIORITY_CRITICAL,
82 EVENT_DEVICE_DISAPPEARED,
83 EVENT_FAIL,
84 EVENT_FAIL_SPARE,
85 EVENT_DEGRADED_ARRAY,
86 EVENT_UNKNOWN
87 };
88
89 mapping_t events_map[] = {
90 {"SpareActive", EVENT_SPARE_ACTIVE},
91 {"NewArray", EVENT_NEW_ARRAY},
92 {"MoveSpare", EVENT_MOVE_SPARE},
93 {"TestMessage", EVENT_TEST_MESSAGE},
94 {"RebuildStarted", EVENT_REBUILD_STARTED},
95 {"Rebuild", EVENT_REBUILD},
96 {"RebuildFinished", EVENT_REBUILD_FINISHED},
97 {"SparesMissing", EVENT_SPARES_MISSING},
98 {"DeviceDisappeared", EVENT_DEVICE_DISAPPEARED},
99 {"Fail", EVENT_FAIL},
100 {"FailSpare", EVENT_FAIL_SPARE},
101 {"DegradedArray", EVENT_DEGRADED_ARRAY},
102 {NULL, EVENT_UNKNOWN}
103 };
104
105 struct event_data {
106 enum event event_enum;
107 /*
108 * @event_name: Rebuild event name must be in form "RebuildXX", where XX is rebuild progress.
109 */
110 char event_name[EVENT_NAME_MAX];
111 char message[BUFSIZ];
112 const char *description;
113 const char *dev;
114 const char *disc;
115 };
116
117 static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist);
118 static void try_spare_migration(struct state *statelist);
119 static void link_containers_with_subarrays(struct state *list);
120 static void free_statelist(struct state *statelist);
121 static int check_array(struct state *st, struct mdstat_ent *mdstat, int increments, char *prefer);
122 static int check_one_sharer(int scan);
123 #ifndef NO_LIBUDEV
124 static int check_udev_activity(void);
125 #endif
126 static void link_containers_with_subarrays(struct state *list);
127 static int make_daemon(char *pidfile);
128 static void try_spare_migration(struct state *statelist);
129 static void write_autorebuild_pid(void);
130
131 int Monitor(struct mddev_dev *devlist,
132 char *mailaddr, char *alert_cmd,
133 struct context *c,
134 int daemonise, int oneshot,
135 int dosyslog, char *pidfile, int increments,
136 int share)
137 {
138 /*
139 * Every few seconds, scan every md device looking for changes
140 * When a change is found, log it, possibly run the alert command,
141 * and possibly send Email
142 *
143 * For each array, we record:
144 * Update time
145 * active/working/failed/spare drives
146 * State of each device.
147 * %rebuilt if rebuilding
148 *
149 * If the update time changes, check out all the data again
150 * It is possible that we cannot get the state of each device
151 * due to bugs in the md kernel module.
152 * We also read /proc/mdstat to get rebuild percent,
153 * and to get state on all active devices incase of kernel bug.
154 *
155 * Events are:
156 * Fail
157 * An active device had Faulty set or Active/Sync removed
158 * FailSpare
159 * A spare device had Faulty set
160 * SpareActive
161 * An active device had a reverse transition
162 * RebuildStarted
163 * percent went from -1 to +ve
164 * RebuildNN
165 * percent went from below to not-below NN%
166 * DeviceDisappeared
167 * Couldn't access a device which was previously visible
168 *
169 * if we detect an array with active<raid and spare==0
170 * we look at other arrays that have same spare-group
171 * If we find one with active==raid and spare>0,
172 * and if we can get_disk_info and find a name
173 * Then we hot-remove and hot-add to the other array
174 *
175 * If devlist is NULL, then we can monitor everything if --scan
176 * was given. We get an initial list from config file and add anything
177 * that appears in /proc/mdstat
178 */
179
180 struct state *statelist = NULL;
181 int finished = 0;
182 struct mdstat_ent *mdstat = NULL;
183 char *mailfrom;
184 struct mddev_ident *mdlist;
185 int delay_for_event = c->delay;
186
187 if (devlist && c->scan) {
188 pr_err("Devices list and --scan option cannot be combined - not monitoring.\n");
189 return 1;
190 }
191
192 if (!mailaddr)
193 mailaddr = conf_get_mailaddr();
194
195 if (!alert_cmd)
196 alert_cmd = conf_get_program();
197
198 mailfrom = conf_get_mailfrom();
199
200 if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
201 pr_err("No mail address or alert command - not monitoring.\n");
202 return 1;
203 }
204
205 if (c->verbose) {
206 pr_err("Monitor is started with delay %ds\n", c->delay);
207 if (mailaddr)
208 pr_err("Monitor using email address %s\n", mailaddr);
209 if (alert_cmd)
210 pr_err("Monitor using program %s\n", alert_cmd);
211 }
212
213 info.alert_cmd = alert_cmd;
214 info.mailaddr = mailaddr;
215 info.mailfrom = mailfrom;
216 info.dosyslog = dosyslog;
217 info.test = c->test;
218
219 if (gethostname(info.hostname, sizeof(info.hostname)) != 0) {
220 pr_err("Cannot get hostname.\n");
221 return 1;
222 }
223 info.hostname[sizeof(info.hostname) - 1] = '\0';
224
225 if (share){
226 if (check_one_sharer(c->scan))
227 return 1;
228 }
229
230 if (daemonise) {
231 int rv = make_daemon(pidfile);
232 if (rv >= 0)
233 return rv;
234 }
235
236 if (share)
237 write_autorebuild_pid();
238
239 if (devlist == NULL) {
240 mdlist = conf_get_ident(NULL);
241 for (; mdlist; mdlist = mdlist->next) {
242 struct state *st;
243
244 if (mdlist->devname == NULL)
245 continue;
246 if (strcasecmp(mdlist->devname, "<ignore>") == 0)
247 continue;
248 if (!is_mddev(mdlist->devname))
249 continue;
250
251 st = xcalloc(1, sizeof *st);
252 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"),
253 "/dev/md/%s", basename(mdlist->devname));
254 st->next = statelist;
255 st->devnm[0] = 0;
256 st->percent = RESYNC_UNKNOWN;
257 st->from_config = 1;
258 st->expected_spares = mdlist->spare_disks;
259 if (mdlist->spare_group)
260 st->spare_group = xstrdup(mdlist->spare_group);
261 statelist = st;
262 }
263 } else {
264 struct mddev_dev *dv;
265
266 for (dv = devlist; dv; dv = dv->next) {
267 struct state *st;
268
269 if (!is_mddev(dv->devname))
270 continue;
271
272 st = xcalloc(1, sizeof *st);
273 mdlist = conf_get_ident(dv->devname);
274 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", dv->devname);
275 st->next = statelist;
276 st->devnm[0] = 0;
277 st->percent = RESYNC_UNKNOWN;
278 st->expected_spares = -1;
279 if (mdlist) {
280 st->expected_spares = mdlist->spare_disks;
281 if (mdlist->spare_group)
282 st->spare_group = xstrdup(mdlist->spare_group);
283 }
284 statelist = st;
285 }
286 }
287
288 while (!finished) {
289 int new_found = 0;
290 struct state *st, **stp;
291 int anydegraded = 0;
292 int anyredundant = 0;
293
294 if (mdstat)
295 free_mdstat(mdstat);
296 mdstat = mdstat_read(oneshot ? 0 : 1, 0);
297
298 for (st = statelist; st; st = st->next) {
299 if (check_array(st, mdstat, increments, c->prefer))
300 anydegraded = 1;
301 /* for external arrays, metadata is filled for
302 * containers only
303 */
304 if (st->metadata && st->metadata->ss->external)
305 continue;
306 if (st->err == 0 && !anyredundant)
307 anyredundant = 1;
308 }
309
310 /* now check if there are any new devices found in mdstat */
311 if (c->scan)
312 new_found = add_new_arrays(mdstat, &statelist);
313
314 /* If an array has active < raid && spare == 0 && spare_group != NULL
315 * Look for another array with spare > 0 and active == raid and same spare_group
316 * if found, choose a device and hotremove/hotadd
317 */
318 if (share && anydegraded)
319 try_spare_migration(statelist);
320 if (!new_found) {
321 if (oneshot)
322 break;
323 else if (!anyredundant) {
324 pr_err("No array with redundancy detected, stopping\n");
325 break;
326 }
327 else {
328 #ifndef NO_LIBUDEV
329 /*
330 * Wait for udevd to finish new devices
331 * processing.
332 */
333 if (mdstat_wait(delay_for_event) &&
334 check_udev_activity())
335 pr_err("Error while waiting for UDEV to complete new devices processing\n");
336 #else
337 int wait_result = mdstat_wait(delay_for_event);
338 /*
339 * Give chance to process new device
340 */
341 if (wait_result != 0) {
342 if (c->delay > 5)
343 delay_for_event = 5;
344 } else
345 delay_for_event = c->delay;
346 #endif
347 mdstat_close();
348 }
349 }
350 info.test = 0;
351
352 for (stp = &statelist; (st = *stp) != NULL; ) {
353 if (st->from_auto && st->err > 5) {
354 *stp = st->next;
355 if (st->spare_group)
356 free(st->spare_group);
357
358 free(st);
359 } else
360 stp = &st->next;
361 }
362 }
363
364 free_statelist(statelist);
365
366 if (pidfile)
367 unlink(pidfile);
368 return 0;
369 }
370
371 static int make_daemon(char *pidfile)
372 {
373 /* Return:
374 * -1 in the forked daemon
375 * 0 in the parent
376 * 1 on error
377 * so a none-negative becomes the exit code.
378 */
379 int pid = fork();
380 if (pid > 0) {
381 if (!pidfile)
382 printf("%d\n", pid);
383 else {
384 FILE *pid_file = NULL;
385 int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC,
386 0644);
387 if (fd >= 0)
388 pid_file = fdopen(fd, "w");
389 if (!pid_file)
390 perror("cannot create pid file");
391 else {
392 fprintf(pid_file,"%d\n", pid);
393 fclose(pid_file);
394 }
395 }
396 return 0;
397 }
398 if (pid < 0) {
399 perror("daemonise");
400 return 1;
401 }
402 manage_fork_fds(0);
403 setsid();
404 return -1;
405 }
406
407 static int check_one_sharer(int scan)
408 {
409 int pid;
410 FILE *comm_fp;
411 FILE *fp;
412 char comm_path[PATH_MAX];
413 char path[PATH_MAX];
414 char comm[20];
415
416 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
417 fp = fopen(path, "r");
418 if (fp) {
419 if (fscanf(fp, "%d", &pid) != 1)
420 pid = -1;
421 snprintf(comm_path, sizeof(comm_path),
422 "/proc/%d/comm", pid);
423 comm_fp = fopen(comm_path, "r");
424 if (comm_fp) {
425 if (fscanf(comm_fp, "%19s", comm) &&
426 strncmp(basename(comm), Name, strlen(Name)) == 0) {
427 if (scan) {
428 pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
429 fclose(comm_fp);
430 fclose(fp);
431 return 1;
432 } else {
433 pr_err("Warning: One autorebuild process already running.\n");
434 }
435 }
436 fclose(comm_fp);
437 }
438 fclose(fp);
439 }
440 return 0;
441 }
442
443 static void write_autorebuild_pid()
444 {
445 char path[PATH_MAX];
446 int pid;
447 FILE *fp = NULL;
448 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
449
450 if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) {
451 pr_err("Can't create autorebuild.pid file\n");
452 } else {
453 int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700);
454
455 if (fd >= 0)
456 fp = fdopen(fd, "w");
457
458 if (!fp)
459 pr_err("Can't create autorebuild.pid file\n");
460 else {
461 pid = getpid();
462 fprintf(fp, "%d\n", pid);
463 fclose(fp);
464 }
465 }
466 }
467
468 #define BASE_MESSAGE "%s event detected on md device %s"
469 #define COMPONENT_DEVICE_MESSAGE ", component device %s"
470 #define DESCRIPTION_MESSAGE ": %s"
471 /*
472 * sprint_event_message() - Writes basic message about detected event to destination ptr.
473 * @dest: message destination, should be at least the size of BUFSIZ
474 * @data: event data
475 *
476 * Return: 0 on success, 1 on error
477 */
478 static int sprint_event_message(char *dest, const struct event_data *data)
479 {
480 if (!dest || !data)
481 return 1;
482
483 if (data->disc && data->description)
484 snprintf(dest, BUFSIZ, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE DESCRIPTION_MESSAGE,
485 data->event_name, data->dev, data->disc, data->description);
486 else if (data->disc)
487 snprintf(dest, BUFSIZ, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE,
488 data->event_name, data->dev, data->disc);
489 else if (data->description)
490 snprintf(dest, BUFSIZ, BASE_MESSAGE DESCRIPTION_MESSAGE,
491 data->event_name, data->dev, data->description);
492 else
493 snprintf(dest, BUFSIZ, BASE_MESSAGE, data->event_name, data->dev);
494
495 return 0;
496 }
497
498 /*
499 * get_syslog_event_priority() - Determines event priority.
500 * @event_enum: event to be checked
501 *
502 * Return: LOG_CRIT, LOG_WARNING or LOG_INFO
503 */
504 static int get_syslog_event_priority(const enum event event_enum)
505 {
506 if (event_enum > __SYSLOG_PRIORITY_CRITICAL)
507 return LOG_CRIT;
508 if (event_enum > __SYSLOG_PRIORITY_WARNING)
509 return LOG_WARNING;
510 return LOG_INFO;
511 }
512
513 /*
514 * is_email_event() - Determines whether email for event should be sent or not.
515 * @event_enum: event to be checked
516 *
517 * Return: true if email should be sent, false otherwise
518 */
519 static bool is_email_event(const enum event event_enum)
520 {
521 static const enum event email_events[] = {
522 EVENT_FAIL,
523 EVENT_FAIL_SPARE,
524 EVENT_DEGRADED_ARRAY,
525 EVENT_SPARES_MISSING,
526 EVENT_TEST_MESSAGE
527 };
528 unsigned int i;
529
530 for (i = 0; i < ARRAY_SIZE(email_events); ++i) {
531 if (event_enum == email_events[i])
532 return true;
533 }
534 return false;
535 }
536
537 /*
538 * execute_alert_cmd() - Forks and executes command provided as alert_cmd.
539 * @data: event data
540 */
541 static void execute_alert_cmd(const struct event_data *data)
542 {
543 int pid = fork();
544
545 switch (pid) {
546 default:
547 waitpid(pid, NULL, 0);
548 break;
549 case -1:
550 pr_err("Cannot fork to execute alert command");
551 break;
552 case 0:
553 execl(info.alert_cmd, info.alert_cmd, data->event_name, data->dev, data->disc, NULL);
554 exit(2);
555 }
556 }
557
558 /*
559 * send_event_email() - Sends an email about event detected by monitor.
560 * @data: event data
561 */
562 static void send_event_email(const struct event_data *data)
563 {
564 FILE *mp, *mdstat;
565 char buf[BUFSIZ];
566 int n;
567
568 mp = popen(Sendmail, "w");
569 if (!mp) {
570 pr_err("Cannot open pipe stream for sendmail.\n");
571 return;
572 }
573
574 signal(SIGPIPE, SIG_IGN);
575 if (info.mailfrom)
576 fprintf(mp, "From: %s\n", info.mailfrom);
577 else
578 fprintf(mp, "From: %s monitoring <root>\n", Name);
579 fprintf(mp, "To: %s\n", info.mailaddr);
580 fprintf(mp, "Subject: %s event on %s:%s\n\n", data->event_name, data->dev, info.hostname);
581 fprintf(mp, "This is an automatically generated mail message.\n");
582 fprintf(mp, "%s\n", data->message);
583
584 mdstat = fopen("/proc/mdstat", "r");
585 if (!mdstat) {
586 pr_err("Cannot open /proc/mdstat\n");
587 pclose(mp);
588 return;
589 }
590
591 fprintf(mp, "The /proc/mdstat file currently contains the following:\n\n");
592 while ((n = fread(buf, 1, sizeof(buf), mdstat)) > 0)
593 n = fwrite(buf, 1, n, mp);
594 fclose(mdstat);
595 pclose(mp);
596 }
597
598 /*
599 * log_event_to_syslog() - Logs an event into syslog.
600 * @data: event data
601 */
602 static void log_event_to_syslog(const struct event_data *data)
603 {
604 int priority;
605
606 priority = get_syslog_event_priority(data->event_enum);
607
608 syslog(priority, "%s\n", data->message);
609 }
610
611 /*
612 * alert() - Alerts about the monitor event.
613 * @event_enum: event to be sent
614 * @description: event description
615 * @progress: rebuild progress
616 * @dev: md device name
617 * @disc: component device
618 *
619 * If needed function executes alert command, sends an email or logs event to syslog.
620 */
621 static void alert(const enum event event_enum, const char *description, const uint8_t progress,
622 const char *dev, const char *disc)
623 {
624 struct event_data data = {.dev = dev, .disc = disc, .description = description};
625
626 if (!dev)
627 return;
628
629 if (event_enum == EVENT_REBUILD) {
630 snprintf(data.event_name, sizeof(data.event_name), "%s%02d",
631 map_num_s(events_map, EVENT_REBUILD), progress);
632 } else {
633 snprintf(data.event_name, sizeof(data.event_name), "%s", map_num_s(events_map, event_enum));
634 }
635
636 data.event_enum = event_enum;
637
638 if (sprint_event_message(data.message, &data) != 0) {
639 pr_err("Cannot create event message.\n");
640 return;
641 }
642 pr_err("%s\n", data.message);
643
644 if (info.alert_cmd)
645 execute_alert_cmd(&data);
646
647 if (info.mailaddr && is_email_event(event_enum))
648 send_event_email(&data);
649
650 if (info.dosyslog)
651 log_event_to_syslog(&data);
652 }
653
654 static int check_array(struct state *st, struct mdstat_ent *mdstat,
655 int increments, char *prefer)
656 {
657 /* Update the state 'st' to reflect any changes shown in mdstat,
658 * or found by directly examining the array, and return
659 * '1' if the array is degraded, or '0' if it is optimal (or dead).
660 */
661 struct { int state, major, minor; } disks_info[MAX_DISKS];
662 struct mdinfo *sra = NULL;
663 mdu_array_info_t array;
664 struct mdstat_ent *mse = NULL, *mse2;
665 char *dev = st->devname;
666 int fd;
667 int i;
668 int remaining_disks;
669 int last_disk;
670 int new_array = 0;
671 int retval;
672 int is_container = 0;
673 unsigned long redundancy_only_flags = 0;
674
675 if (info.test)
676 alert(EVENT_TEST_MESSAGE, NULL, 0, dev, NULL);
677
678 retval = 0;
679
680 fd = open(dev, O_RDONLY);
681 if (fd < 0)
682 goto disappeared;
683
684 if (st->devnm[0] == 0)
685 snprintf(st->devnm, MD_NAME_MAX, "%s", fd2devnm(fd));
686
687 for (mse2 = mdstat; mse2; mse2 = mse2->next)
688 if (strcmp(mse2->devnm, st->devnm) == 0) {
689 mse2->devnm[0] = 0; /* flag it as "used" */
690 mse = mse2;
691 }
692
693 if (!mse) {
694 /* duplicated array in statelist
695 * or re-created after reading mdstat
696 */
697 st->err++;
698 goto out;
699 }
700
701 if (mse->level == NULL)
702 is_container = 1;
703
704 if (!is_container && !md_array_active(fd))
705 goto disappeared;
706
707 fcntl(fd, F_SETFD, FD_CLOEXEC);
708 if (md_get_array_info(fd, &array) < 0)
709 goto disappeared;
710
711 if (!is_container && map_name(pers, mse->level) > 0)
712 redundancy_only_flags |= GET_MISMATCH;
713
714 sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS |
715 GET_STATE | redundancy_only_flags);
716
717 if (!sra)
718 goto disappeared;
719
720 /* It's much easier to list what array levels can't
721 * have a device disappear than all of them that can
722 */
723 if (sra->array.level == 0 || sra->array.level == -1) {
724 if (!st->err && !st->from_config)
725 alert(EVENT_DEVICE_DISAPPEARED, "Wrong-Level", 0, dev, NULL);
726 st->err++;
727 goto out;
728 }
729
730 /* this array is in /proc/mdstat */
731 if (array.utime == 0)
732 /* external arrays don't update utime, so
733 * just make sure it is always different. */
734 array.utime = st->utime + 1;;
735
736 if (st->err) {
737 /* New array appeared where previously had an error */
738 st->err = 0;
739 st->percent = RESYNC_NONE;
740 new_array = 1;
741 if (!is_container)
742 alert(EVENT_NEW_ARRAY, NULL, 0, st->devname, NULL);
743 }
744
745 if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
746 st->working == sra->array.working_disks &&
747 st->spare == sra->array.spare_disks &&
748 (mse == NULL || (mse->percent == st->percent))) {
749 if ((st->active < st->raid) && st->spare == 0)
750 retval = 1;
751 goto out;
752 }
753 if (st->utime == 0 && /* new array */
754 mse->pattern && strchr(mse->pattern, '_') /* degraded */)
755 alert(EVENT_DEGRADED_ARRAY, NULL, 0, dev, NULL);
756
757 if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
758 sra->array.spare_disks < st->expected_spares)
759 alert(EVENT_SPARES_MISSING, NULL, 0, dev, NULL);
760 if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
761 mse->percent >= 0)
762 alert(EVENT_REBUILD_STARTED, NULL, 0, dev, NULL);
763 if (st->percent >= 0 && mse->percent >= 0 &&
764 (mse->percent / increments) > (st->percent / increments)) {
765 if((mse->percent / increments) == 0)
766 alert(EVENT_REBUILD_STARTED, NULL, 0, dev, NULL);
767 else
768 alert(EVENT_REBUILD, NULL, mse->percent, dev, NULL);
769 }
770
771 if (mse->percent == RESYNC_NONE && st->percent >= 0) {
772 /* Rebuild/sync/whatever just finished.
773 * If there is a number in /mismatch_cnt,
774 * we should report that.
775 */
776 if (sra && sra->mismatch_cnt > 0) {
777 char cnt[80];
778 snprintf(cnt, sizeof(cnt),
779 " mismatches found: %d (on raid level %d)",
780 sra->mismatch_cnt, sra->array.level);
781 alert(EVENT_REBUILD_FINISHED, NULL, 0, dev, cnt);
782 } else
783 alert(EVENT_REBUILD_FINISHED, NULL, 0, dev, NULL);
784 }
785 st->percent = mse->percent;
786
787 remaining_disks = sra->array.nr_disks;
788 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
789 mdu_disk_info_t disc;
790 disc.number = i;
791 if (md_get_disk_info(fd, &disc) >= 0) {
792 disks_info[i].state = disc.state;
793 disks_info[i].major = disc.major;
794 disks_info[i].minor = disc.minor;
795 if (disc.major || disc.minor)
796 remaining_disks --;
797 } else
798 disks_info[i].major = disks_info[i].minor = 0;
799 }
800 last_disk = i;
801
802 if (mse->metadata_version &&
803 strncmp(mse->metadata_version, "external:", 9) == 0 &&
804 is_subarray(mse->metadata_version+9)) {
805 char *sl;
806 snprintf(st->parent_devnm, MD_NAME_MAX, "%s", mse->metadata_version + 10);
807 sl = strchr(st->parent_devnm, '/');
808 if (sl)
809 *sl = 0;
810 } else
811 st->parent_devnm[0] = 0;
812 if (st->metadata == NULL && st->parent_devnm[0] == 0)
813 st->metadata = super_by_fd(fd, NULL);
814
815 for (i = 0; i < MAX_DISKS; i++) {
816 mdu_disk_info_t disc = {0, 0, 0, 0, 0};
817 int newstate = 0;
818 int change;
819 char *dv = NULL;
820 disc.number = i;
821 if (i < last_disk && (disks_info[i].major || disks_info[i].minor)) {
822 newstate = disks_info[i].state;
823 dv = map_dev_preferred(disks_info[i].major, disks_info[i].minor, 1,
824 prefer);
825 disc.state = newstate;
826 disc.major = disks_info[i].major;
827 disc.minor = disks_info[i].minor;
828 } else
829 newstate = (1 << MD_DISK_REMOVED);
830
831 if (dv == NULL && st->devid[i])
832 dv = map_dev_preferred(major(st->devid[i]),
833 minor(st->devid[i]), 1, prefer);
834 change = newstate ^ st->devstate[i];
835 if (st->utime && change && !st->err && !new_array) {
836 if ((st->devstate[i]&change) & (1 << MD_DISK_SYNC))
837 alert(EVENT_FAIL, NULL, 0, dev, dv);
838 else if ((newstate & (1 << MD_DISK_FAULTY)) &&
839 (disc.major || disc.minor) &&
840 st->devid[i] == makedev(disc.major,
841 disc.minor))
842 alert(EVENT_FAIL_SPARE, NULL, 0, dev, dv);
843 else if ((newstate&change) & (1 << MD_DISK_SYNC))
844 alert(EVENT_SPARE_ACTIVE, NULL, 0, dev, dv);
845 }
846 st->devstate[i] = newstate;
847 st->devid[i] = makedev(disc.major, disc.minor);
848 }
849 st->active = sra->array.active_disks;
850 st->working = sra->array.working_disks;
851 st->spare = sra->array.spare_disks;
852 st->failed = sra->array.failed_disks;
853 st->utime = array.utime;
854 st->raid = sra->array.raid_disks;
855 st->err = 0;
856 if ((st->active < st->raid) && st->spare == 0)
857 retval = 1;
858
859 out:
860 if (sra)
861 sysfs_free(sra);
862 if (fd >= 0)
863 close(fd);
864 return retval;
865
866 disappeared:
867 if (!st->err && !is_container)
868 alert(EVENT_DEVICE_DISAPPEARED, NULL, 0, dev, NULL);
869 st->err++;
870 goto out;
871 }
872
873 static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist)
874 {
875 struct mdstat_ent *mse;
876 int new_found = 0;
877 char *name;
878
879 for (mse = mdstat; mse; mse = mse->next)
880 if (mse->devnm[0] && (!mse->level || /* retrieve containers */
881 (strcmp(mse->level, "raid0") != 0 &&
882 strcmp(mse->level, "linear") != 0))) {
883 struct state *st = xcalloc(1, sizeof *st);
884 mdu_array_info_t array;
885 int fd;
886
887 name = get_md_name(mse->devnm);
888 if (!name) {
889 free(st);
890 continue;
891 }
892
893 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", name);
894 if ((fd = open(st->devname, O_RDONLY)) < 0 ||
895 md_get_array_info(fd, &array) < 0) {
896 /* no such array */
897 if (fd >= 0)
898 close(fd);
899 put_md_name(st->devname);
900 if (st->metadata) {
901 st->metadata->ss->free_super(st->metadata);
902 free(st->metadata);
903 }
904 free(st);
905 continue;
906 }
907 close(fd);
908 st->next = *statelist;
909 st->err = 1;
910 st->from_auto = 1;
911 snprintf(st->devnm, MD_NAME_MAX, "%s", mse->devnm);
912 st->percent = RESYNC_UNKNOWN;
913 st->expected_spares = -1;
914 if (mse->metadata_version &&
915 strncmp(mse->metadata_version,
916 "external:", 9) == 0 &&
917 is_subarray(mse->metadata_version+9)) {
918 char *sl;
919 snprintf(st->parent_devnm, MD_NAME_MAX,
920 "%s", mse->metadata_version + 10);
921 sl = strchr(st->parent_devnm, '/');
922 *sl = 0;
923 } else
924 st->parent_devnm[0] = 0;
925 *statelist = st;
926 if (info.test)
927 alert(EVENT_TEST_MESSAGE, NULL, 0, st->devname, NULL);
928 new_found = 1;
929 }
930 return new_found;
931 }
932
933 static int get_required_spare_criteria(struct state *st,
934 struct spare_criteria *sc)
935 {
936 int fd;
937
938 if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
939 sc->min_size = 0;
940 sc->sector_size = 0;
941 return 0;
942 }
943
944 fd = open(st->devname, O_RDONLY);
945 if (fd < 0)
946 return 1;
947 if (st->metadata->ss->external)
948 st->metadata->ss->load_container(st->metadata, fd, st->devname);
949 else
950 st->metadata->ss->load_super(st->metadata, fd, st->devname);
951 close(fd);
952 if (!st->metadata->sb)
953 return 1;
954
955 st->metadata->ss->get_spare_criteria(st->metadata, sc);
956 st->metadata->ss->free_super(st->metadata);
957
958 return 0;
959 }
960
961 static int check_donor(struct state *from, struct state *to)
962 {
963 struct state *sub;
964
965 if (from == to)
966 return 0;
967 if (from->parent)
968 /* Cannot move from a member */
969 return 0;
970 if (from->err)
971 return 0;
972 for (sub = from->subarray; sub; sub = sub->subarray)
973 /* If source array has degraded subarrays, don't
974 * remove anything
975 */
976 if (sub->active < sub->raid)
977 return 0;
978 if (from->metadata->ss->external == 0)
979 if (from->active < from->raid)
980 return 0;
981 if (from->spare <= 0)
982 return 0;
983 return 1;
984 }
985
986 static dev_t choose_spare(struct state *from, struct state *to,
987 struct domainlist *domlist, struct spare_criteria *sc)
988 {
989 int d;
990 dev_t dev = 0;
991
992 for (d = from->raid; !dev && d < MAX_DISKS; d++) {
993 if (from->devid[d] > 0 && from->devstate[d] == 0) {
994 struct dev_policy *pol;
995 unsigned long long dev_size;
996 unsigned int dev_sector_size;
997
998 if (to->metadata->ss->external &&
999 test_partition_from_id(from->devid[d]))
1000 continue;
1001
1002 if (sc->min_size &&
1003 dev_size_from_id(from->devid[d], &dev_size) &&
1004 dev_size < sc->min_size)
1005 continue;
1006
1007 if (sc->sector_size &&
1008 dev_sector_size_from_id(from->devid[d],
1009 &dev_sector_size) &&
1010 sc->sector_size != dev_sector_size)
1011 continue;
1012
1013 pol = devid_policy(from->devid[d]);
1014 if (from->spare_group)
1015 pol_add(&pol, pol_domain,
1016 from->spare_group, NULL);
1017 if (domain_test(domlist, pol,
1018 to->metadata->ss->name) == 1)
1019 dev = from->devid[d];
1020 dev_policy_free(pol);
1021 }
1022 }
1023 return dev;
1024 }
1025
1026 static dev_t container_choose_spare(struct state *from, struct state *to,
1027 struct domainlist *domlist,
1028 struct spare_criteria *sc, int active)
1029 {
1030 /* This is similar to choose_spare, but we cannot trust devstate,
1031 * so we need to read the metadata instead
1032 */
1033 struct mdinfo *list;
1034 struct supertype *st = from->metadata;
1035 int fd = open(from->devname, O_RDONLY);
1036 int err;
1037 dev_t dev = 0;
1038
1039 if (fd < 0)
1040 return 0;
1041 if (!st->ss->getinfo_super_disks) {
1042 close(fd);
1043 return 0;
1044 }
1045
1046 err = st->ss->load_container(st, fd, NULL);
1047 close(fd);
1048 if (err)
1049 return 0;
1050
1051 if (from == to) {
1052 /* We must check if number of active disks has not increased
1053 * since ioctl in main loop. mdmon may have added spare
1054 * to subarray. If so we do not need to look for more spares
1055 * so return non zero value */
1056 int active_cnt = 0;
1057 struct mdinfo *dp;
1058 list = st->ss->getinfo_super_disks(st);
1059 if (!list) {
1060 st->ss->free_super(st);
1061 return 1;
1062 }
1063 dp = list->devs;
1064 while (dp) {
1065 if (dp->disk.state & (1 << MD_DISK_SYNC) &&
1066 !(dp->disk.state & (1 << MD_DISK_FAULTY)))
1067 active_cnt++;
1068 dp = dp->next;
1069 }
1070 sysfs_free(list);
1071 if (active < active_cnt) {
1072 /* Spare just activated.*/
1073 st->ss->free_super(st);
1074 return 1;
1075 }
1076 }
1077
1078 /* We only need one spare so full list not needed */
1079 list = container_choose_spares(st, sc, domlist, from->spare_group,
1080 to->metadata->ss->name, 1);
1081 if (list) {
1082 struct mdinfo *disks = list->devs;
1083 if (disks)
1084 dev = makedev(disks->disk.major, disks->disk.minor);
1085 sysfs_free(list);
1086 }
1087 st->ss->free_super(st);
1088 return dev;
1089 }
1090
1091 static void try_spare_migration(struct state *statelist)
1092 {
1093 struct state *from;
1094 struct state *st;
1095 struct spare_criteria sc;
1096
1097 link_containers_with_subarrays(statelist);
1098 for (st = statelist; st; st = st->next)
1099 if (st->active < st->raid && st->spare == 0 && !st->err) {
1100 struct domainlist *domlist = NULL;
1101 int d;
1102 struct state *to = st;
1103
1104 if (to->parent_devnm[0] && !to->parent)
1105 /* subarray monitored without parent container
1106 * we can't move spares here */
1107 continue;
1108
1109 if (to->parent)
1110 /* member of a container */
1111 to = to->parent;
1112
1113 if (get_required_spare_criteria(to, &sc))
1114 continue;
1115 if (to->metadata->ss->external) {
1116 /* We must make sure there is
1117 * no suitable spare in container already.
1118 * If there is we don't add more */
1119 dev_t devid = container_choose_spare(
1120 to, to, NULL, &sc, st->active);
1121 if (devid > 0)
1122 continue;
1123 }
1124 for (d = 0; d < MAX_DISKS; d++)
1125 if (to->devid[d])
1126 domainlist_add_dev(&domlist,
1127 to->devid[d],
1128 to->metadata->ss->name);
1129 if (to->spare_group)
1130 domain_add(&domlist, to->spare_group);
1131 /*
1132 * No spare migration if the destination
1133 * has no domain. Skip this array.
1134 */
1135 if (!domlist)
1136 continue;
1137 for (from=statelist ; from ; from=from->next) {
1138 dev_t devid;
1139 if (!check_donor(from, to))
1140 continue;
1141 if (from->metadata->ss->external)
1142 devid = container_choose_spare(
1143 from, to, domlist, &sc, 0);
1144 else
1145 devid = choose_spare(from, to, domlist,
1146 &sc);
1147 if (devid > 0 &&
1148 move_spare(from->devname, to->devname,
1149 devid)) {
1150 alert(EVENT_MOVE_SPARE, NULL, 0, to->devname, from->devname);
1151 break;
1152 }
1153 }
1154 domain_free(domlist);
1155 }
1156 }
1157
1158 /* search the statelist to connect external
1159 * metadata subarrays with their containers
1160 * We always completely rebuild the tree from scratch as
1161 * that is safest considering the possibility of entries
1162 * disappearing or changing.
1163 */
1164 static void link_containers_with_subarrays(struct state *list)
1165 {
1166 struct state *st;
1167 struct state *cont;
1168 for (st = list; st; st = st->next) {
1169 st->parent = NULL;
1170 st->subarray = NULL;
1171 }
1172 for (st = list; st; st = st->next)
1173 if (st->parent_devnm[0])
1174 for (cont = list; cont; cont = cont->next)
1175 if (!cont->err && cont->parent_devnm[0] == 0 &&
1176 strcmp(cont->devnm, st->parent_devnm) == 0) {
1177 st->parent = cont;
1178 st->subarray = cont->subarray;
1179 cont->subarray = st;
1180 break;
1181 }
1182 }
1183
1184 /**
1185 * free_statelist() - Frees statelist.
1186 * @statelist: statelist to free
1187 */
1188 static void free_statelist(struct state *statelist)
1189 {
1190 struct state *tmp = NULL;
1191
1192 while (statelist) {
1193 if (statelist->spare_group)
1194 free(statelist->spare_group);
1195
1196 tmp = statelist;
1197 statelist = statelist->next;
1198 free(tmp);
1199 }
1200 }
1201
1202 #ifndef NO_LIBUDEV
1203 /* function: check_udev_activity
1204 * Description: Function waits for udev to finish
1205 * events processing.
1206 * Returns:
1207 * 1 - detected error while opening udev
1208 * 2 - timeout
1209 * 0 - successfull completion
1210 */
1211 static int check_udev_activity(void)
1212 {
1213 struct udev *udev = NULL;
1214 struct udev_queue *udev_queue = NULL;
1215 int timeout_cnt = 30;
1216 int rc = 0;
1217
1218 /*
1219 * In rare cases systemd may not have udevm,
1220 * in such cases just exit with rc 0
1221 */
1222 if (!use_udev())
1223 goto out;
1224
1225 udev = udev_new();
1226 if (!udev) {
1227 rc = 1;
1228 goto out;
1229 }
1230
1231 udev_queue = udev_queue_new(udev);
1232 if (!udev_queue) {
1233 rc = 1;
1234 goto out;
1235 }
1236
1237 if (udev_queue_get_queue_is_empty(udev_queue))
1238 goto out;
1239
1240 while (!udev_queue_get_queue_is_empty(udev_queue)) {
1241 sleep(1);
1242
1243 if (timeout_cnt)
1244 timeout_cnt--;
1245 else {
1246 rc = 2;
1247 goto out;
1248 }
1249 }
1250
1251 out:
1252 if (udev_queue)
1253 udev_queue_unref(udev_queue);
1254 if (udev)
1255 udev_unref(udev);
1256 return rc;
1257 }
1258 #endif
1259
1260 /* Not really Monitor but ... */
1261 int Wait(char *dev)
1262 {
1263 char devnm[32];
1264 dev_t rdev;
1265 char *tmp;
1266 int rv = 1;
1267 int frozen_remaining = 3;
1268
1269 if (!stat_is_blkdev(dev, &rdev))
1270 return 2;
1271
1272 tmp = devid2devnm(rdev);
1273 if (!tmp) {
1274 pr_err("Cannot get md device name.\n");
1275 return 2;
1276 }
1277
1278 strcpy(devnm, tmp);
1279
1280 while(1) {
1281 struct mdstat_ent *ms = mdstat_read(1, 0);
1282 struct mdstat_ent *e;
1283
1284 for (e = ms; e; e = e->next)
1285 if (strcmp(e->devnm, devnm) == 0)
1286 break;
1287
1288 if (e && e->percent == RESYNC_NONE) {
1289 /* We could be in the brief pause before something
1290 * starts. /proc/mdstat doesn't show that, but
1291 * sync_action does.
1292 */
1293 struct mdinfo mdi;
1294 char buf[21];
1295
1296 if (sysfs_init(&mdi, -1, devnm))
1297 return 2;
1298 if (sysfs_get_str(&mdi, NULL, "sync_action",
1299 buf, 20) > 0 &&
1300 strcmp(buf,"idle\n") != 0) {
1301 e->percent = RESYNC_UNKNOWN;
1302 if (strcmp(buf, "frozen\n") == 0) {
1303 if (frozen_remaining == 0)
1304 e->percent = RESYNC_NONE;
1305 else
1306 frozen_remaining -= 1;
1307 }
1308 }
1309 }
1310 if (!e || e->percent == RESYNC_NONE) {
1311 if (e && e->metadata_version &&
1312 strncmp(e->metadata_version, "external:", 9) == 0) {
1313 if (is_subarray(&e->metadata_version[9]))
1314 ping_monitor(&e->metadata_version[9]);
1315 else
1316 ping_monitor(devnm);
1317 }
1318 free_mdstat(ms);
1319 return rv;
1320 }
1321 free_mdstat(ms);
1322 rv = 0;
1323 mdstat_wait(5);
1324 }
1325 }
1326
1327 /* The state "broken" is used only for RAID0/LINEAR - it's the same as
1328 * "clean", but used in case the array has one or more members missing.
1329 */
1330 static char *clean_states[] = {
1331 "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
1332
1333 int WaitClean(char *dev, int verbose)
1334 {
1335 int fd;
1336 struct mdinfo *mdi;
1337 int rv = 1;
1338 char devnm[32];
1339
1340 if (!stat_is_blkdev(dev, NULL))
1341 return 2;
1342 fd = open(dev, O_RDONLY);
1343 if (fd < 0) {
1344 if (verbose)
1345 pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
1346 return 1;
1347 }
1348
1349 strcpy(devnm, fd2devnm(fd));
1350 mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
1351 if (!mdi) {
1352 if (verbose)
1353 pr_err("Failed to read sysfs attributes for %s\n", dev);
1354 close(fd);
1355 return 0;
1356 }
1357
1358 switch(mdi->array.level) {
1359 case LEVEL_LINEAR:
1360 case LEVEL_MULTIPATH:
1361 case 0:
1362 /* safemode delay is irrelevant for these levels */
1363 rv = 0;
1364 }
1365
1366 /* for internal metadata the kernel handles the final clean
1367 * transition, containers can never be dirty
1368 */
1369 if (!is_subarray(mdi->text_version))
1370 rv = 0;
1371
1372 /* safemode disabled ? */
1373 if (mdi->safe_mode_delay == 0)
1374 rv = 0;
1375
1376 if (rv) {
1377 int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
1378 char buf[20];
1379 int delay = 5000;
1380
1381 /* minimize the safe_mode_delay and prepare to wait up to 5s
1382 * for writes to quiesce
1383 */
1384 sysfs_set_safemode(mdi, 1);
1385
1386 /* wait for array_state to be clean */
1387 while (1) {
1388 rv = read(state_fd, buf, sizeof(buf));
1389 if (rv < 0)
1390 break;
1391 if (sysfs_match_word(buf, clean_states) <
1392 (int)ARRAY_SIZE(clean_states) - 1)
1393 break;
1394 rv = sysfs_wait(state_fd, &delay);
1395 if (rv < 0 && errno != EINTR)
1396 break;
1397 lseek(state_fd, 0, SEEK_SET);
1398 }
1399 if (rv < 0)
1400 rv = 1;
1401 else if (ping_monitor(mdi->text_version) == 0) {
1402 /* we need to ping to close the window between array
1403 * state transitioning to clean and the metadata being
1404 * marked clean
1405 */
1406 rv = 0;
1407 } else {
1408 rv = 1;
1409 pr_err("Error connecting monitor with %s\n", dev);
1410 }
1411 if (rv && verbose)
1412 pr_err("Error waiting for %s to be clean\n", dev);
1413
1414 /* restore the original safe_mode_delay */
1415 sysfs_set_safemode(mdi, mdi->safe_mode_delay);
1416 close(state_fd);
1417 }
1418
1419 sysfs_free(mdi);
1420 close(fd);
1421
1422 return rv;
1423 }