]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Monitor.c
mdadm: Respect config file location in man
[thirdparty/mdadm.git] / Monitor.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_p.h"
27 #include "md_u.h"
28 #include <sys/wait.h>
29 #include <limits.h>
30 #include <syslog.h>
31 #ifndef NO_LIBUDEV
32 #include <libudev.h>
33 #endif
34
35 struct state {
36 char *devname;
37 char devnm[32]; /* to sync with mdstat info */
38 unsigned int utime;
39 int err;
40 char *spare_group;
41 int active, working, failed, spare, raid;
42 int from_config;
43 int from_auto;
44 int expected_spares;
45 int devstate[MAX_DISKS];
46 dev_t devid[MAX_DISKS];
47 int percent;
48 char parent_devnm[32]; /* For subarray, devnm of parent.
49 * For others, ""
50 */
51 struct supertype *metadata;
52 struct state *subarray;/* for a container it is a link to first subarray
53 * for a subarray it is a link to next subarray
54 * in the same container */
55 struct state *parent; /* for a subarray it is a link to its container
56 */
57 struct state *next;
58 };
59
60 struct alert_info {
61 char *mailaddr;
62 char *mailfrom;
63 char *alert_cmd;
64 int dosyslog;
65 };
66 static int make_daemon(char *pidfile);
67 static int check_one_sharer(int scan);
68 static void write_autorebuild_pid(void);
69 static void alert(char *event, char *dev, char *disc, struct alert_info *info);
70 static int check_array(struct state *st, struct mdstat_ent *mdstat,
71 int test, struct alert_info *info,
72 int increments, char *prefer);
73 static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
74 int test, struct alert_info *info);
75 static void try_spare_migration(struct state *statelist, struct alert_info *info);
76 static void link_containers_with_subarrays(struct state *list);
77 #ifndef NO_LIBUDEV
78 static int check_udev_activity(void);
79 #endif
80
81 int Monitor(struct mddev_dev *devlist,
82 char *mailaddr, char *alert_cmd,
83 struct context *c,
84 int daemonise, int oneshot,
85 int dosyslog, char *pidfile, int increments,
86 int share)
87 {
88 /*
89 * Every few seconds, scan every md device looking for changes
90 * When a change is found, log it, possibly run the alert command,
91 * and possibly send Email
92 *
93 * For each array, we record:
94 * Update time
95 * active/working/failed/spare drives
96 * State of each device.
97 * %rebuilt if rebuilding
98 *
99 * If the update time changes, check out all the data again
100 * It is possible that we cannot get the state of each device
101 * due to bugs in the md kernel module.
102 * We also read /proc/mdstat to get rebuild percent,
103 * and to get state on all active devices incase of kernel bug.
104 *
105 * Events are:
106 * Fail
107 * An active device had Faulty set or Active/Sync removed
108 * FailSpare
109 * A spare device had Faulty set
110 * SpareActive
111 * An active device had a reverse transition
112 * RebuildStarted
113 * percent went from -1 to +ve
114 * RebuildNN
115 * percent went from below to not-below NN%
116 * DeviceDisappeared
117 * Couldn't access a device which was previously visible
118 *
119 * if we detect an array with active<raid and spare==0
120 * we look at other arrays that have same spare-group
121 * If we find one with active==raid and spare>0,
122 * and if we can get_disk_info and find a name
123 * Then we hot-remove and hot-add to the other array
124 *
125 * If devlist is NULL, then we can monitor everything because --scan
126 * was given. We get an initial list from config file and add anything
127 * that appears in /proc/mdstat
128 */
129
130 struct state *statelist = NULL;
131 struct state *st2;
132 int finished = 0;
133 struct mdstat_ent *mdstat = NULL;
134 char *mailfrom;
135 struct alert_info info;
136 struct mddev_ident *mdlist;
137 int delay_for_event = c->delay;
138
139 if (!mailaddr) {
140 mailaddr = conf_get_mailaddr();
141 if (mailaddr && ! c->scan)
142 pr_err("Monitor using email address \"%s\" from config file\n",
143 mailaddr);
144 }
145 mailfrom = conf_get_mailfrom();
146
147 if (!alert_cmd) {
148 alert_cmd = conf_get_program();
149 if (alert_cmd && !c->scan)
150 pr_err("Monitor using program \"%s\" from config file\n",
151 alert_cmd);
152 }
153 if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
154 pr_err("No mail address or alert command - not monitoring.\n");
155 return 1;
156 }
157 info.alert_cmd = alert_cmd;
158 info.mailaddr = mailaddr;
159 info.mailfrom = mailfrom;
160 info.dosyslog = dosyslog;
161
162 if (share){
163 if (check_one_sharer(c->scan))
164 return 1;
165 }
166
167 if (daemonise) {
168 int rv = make_daemon(pidfile);
169 if (rv >= 0)
170 return rv;
171 }
172
173 if (share)
174 write_autorebuild_pid();
175
176 if (devlist == NULL) {
177 mdlist = conf_get_ident(NULL);
178 for (; mdlist; mdlist = mdlist->next) {
179 struct state *st;
180
181 if (mdlist->devname == NULL)
182 continue;
183 if (strcasecmp(mdlist->devname, "<ignore>") == 0)
184 continue;
185 st = xcalloc(1, sizeof *st);
186 if (mdlist->devname[0] == '/')
187 st->devname = xstrdup(mdlist->devname);
188 else {
189 st->devname = xmalloc(8+strlen(mdlist->devname)+1);
190 strcpy(strcpy(st->devname, "/dev/md/"),
191 mdlist->devname);
192 }
193 st->next = statelist;
194 st->devnm[0] = 0;
195 st->percent = RESYNC_UNKNOWN;
196 st->from_config = 1;
197 st->expected_spares = mdlist->spare_disks;
198 if (mdlist->spare_group)
199 st->spare_group = xstrdup(mdlist->spare_group);
200 statelist = st;
201 }
202 } else {
203 struct mddev_dev *dv;
204
205 for (dv = devlist; dv; dv = dv->next) {
206 struct state *st = xcalloc(1, sizeof *st);
207 mdlist = conf_get_ident(dv->devname);
208 st->devname = xstrdup(dv->devname);
209 st->next = statelist;
210 st->devnm[0] = 0;
211 st->percent = RESYNC_UNKNOWN;
212 st->expected_spares = -1;
213 if (mdlist) {
214 st->expected_spares = mdlist->spare_disks;
215 if (mdlist->spare_group)
216 st->spare_group = xstrdup(mdlist->spare_group);
217 }
218 statelist = st;
219 }
220 }
221
222 while (!finished) {
223 int new_found = 0;
224 struct state *st, **stp;
225 int anydegraded = 0;
226 int anyredundant = 0;
227
228 if (mdstat)
229 free_mdstat(mdstat);
230 mdstat = mdstat_read(oneshot ? 0 : 1, 0);
231
232 for (st = statelist; st; st = st->next) {
233 if (check_array(st, mdstat, c->test, &info,
234 increments, c->prefer))
235 anydegraded = 1;
236 /* for external arrays, metadata is filled for
237 * containers only
238 */
239 if (st->metadata && st->metadata->ss->external)
240 continue;
241 if (st->err == 0 && !anyredundant)
242 anyredundant = 1;
243 }
244
245 /* now check if there are any new devices found in mdstat */
246 if (c->scan)
247 new_found = add_new_arrays(mdstat, &statelist, c->test,
248 &info);
249
250 /* If an array has active < raid && spare == 0 && spare_group != NULL
251 * Look for another array with spare > 0 and active == raid and same spare_group
252 * if found, choose a device and hotremove/hotadd
253 */
254 if (share && anydegraded)
255 try_spare_migration(statelist, &info);
256 if (!new_found) {
257 if (oneshot)
258 break;
259 else if (!anyredundant) {
260 pr_err("No array with redundancy detected, stopping\n");
261 break;
262 }
263 else {
264 #ifndef NO_LIBUDEV
265 /*
266 * Wait for udevd to finish new devices
267 * processing.
268 */
269 if (mdstat_wait(delay_for_event) &&
270 check_udev_activity())
271 pr_err("Error while waiting for UDEV to complete new devices processing\n");
272 #else
273 int wait_result = mdstat_wait(delay_for_event);
274 /*
275 * Give chance to process new device
276 */
277 if (wait_result != 0) {
278 if (c->delay > 5)
279 delay_for_event = 5;
280 } else
281 delay_for_event = c->delay;
282 #endif
283 mdstat_close();
284 }
285 }
286 c->test = 0;
287
288 for (stp = &statelist; (st = *stp) != NULL; ) {
289 if (st->from_auto && st->err > 5) {
290 *stp = st->next;
291 free(st->devname);
292 free(st->spare_group);
293 free(st);
294 } else
295 stp = &st->next;
296 }
297 }
298 for (st2 = statelist; st2; st2 = statelist) {
299 statelist = st2->next;
300 free(st2);
301 }
302
303 if (pidfile)
304 unlink(pidfile);
305 return 0;
306 }
307
308 static int make_daemon(char *pidfile)
309 {
310 /* Return:
311 * -1 in the forked daemon
312 * 0 in the parent
313 * 1 on error
314 * so a none-negative becomes the exit code.
315 */
316 int pid = fork();
317 if (pid > 0) {
318 if (!pidfile)
319 printf("%d\n", pid);
320 else {
321 FILE *pid_file = NULL;
322 int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC,
323 0644);
324 if (fd >= 0)
325 pid_file = fdopen(fd, "w");
326 if (!pid_file)
327 perror("cannot create pid file");
328 else {
329 fprintf(pid_file,"%d\n", pid);
330 fclose(pid_file);
331 }
332 }
333 return 0;
334 }
335 if (pid < 0) {
336 perror("daemonise");
337 return 1;
338 }
339 manage_fork_fds(0);
340 setsid();
341 return -1;
342 }
343
344 static int check_one_sharer(int scan)
345 {
346 int pid;
347 FILE *comm_fp;
348 FILE *fp;
349 char comm_path[PATH_MAX];
350 char path[PATH_MAX];
351 char comm[20];
352
353 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
354 fp = fopen(path, "r");
355 if (fp) {
356 if (fscanf(fp, "%d", &pid) != 1)
357 pid = -1;
358 snprintf(comm_path, sizeof(comm_path),
359 "/proc/%d/comm", pid);
360 comm_fp = fopen(comm_path, "r");
361 if (comm_fp) {
362 if (fscanf(comm_fp, "%19s", comm) &&
363 strncmp(basename(comm), Name, strlen(Name)) == 0) {
364 if (scan) {
365 pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
366 fclose(comm_fp);
367 fclose(fp);
368 return 1;
369 } else {
370 pr_err("Warning: One autorebuild process already running.\n");
371 }
372 }
373 fclose(comm_fp);
374 }
375 fclose(fp);
376 }
377 return 0;
378 }
379
380 static void write_autorebuild_pid()
381 {
382 char path[PATH_MAX];
383 int pid;
384 FILE *fp = NULL;
385 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
386
387 if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) {
388 pr_err("Can't create autorebuild.pid file\n");
389 } else {
390 int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700);
391
392 if (fd >= 0)
393 fp = fdopen(fd, "w");
394
395 if (!fp)
396 pr_err("Can't create autorebuild.pid file\n");
397 else {
398 pid = getpid();
399 fprintf(fp, "%d\n", pid);
400 fclose(fp);
401 }
402 }
403 }
404
405 static void alert(char *event, char *dev, char *disc, struct alert_info *info)
406 {
407 int priority;
408
409 if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) {
410 time_t now = time(0);
411
412 printf("%1.15s: %s on %s %s\n", ctime(&now) + 4,
413 event, dev, disc?disc:"unknown device");
414 }
415 if (info->alert_cmd) {
416 int pid = fork();
417 switch(pid) {
418 default:
419 waitpid(pid, NULL, 0);
420 break;
421 case -1:
422 break;
423 case 0:
424 execl(info->alert_cmd, info->alert_cmd,
425 event, dev, disc, NULL);
426 exit(2);
427 }
428 }
429 if (info->mailaddr && (strncmp(event, "Fail", 4) == 0 ||
430 strncmp(event, "Test", 4) == 0 ||
431 strncmp(event, "Spares", 6) == 0 ||
432 strncmp(event, "Degrade", 7) == 0)) {
433 FILE *mp = popen(Sendmail, "w");
434 if (mp) {
435 FILE *mdstat;
436 char hname[256];
437
438 gethostname(hname, sizeof(hname));
439 signal_s(SIGPIPE, SIG_IGN);
440
441 if (info->mailfrom)
442 fprintf(mp, "From: %s\n", info->mailfrom);
443 else
444 fprintf(mp, "From: %s monitoring <root>\n",
445 Name);
446 fprintf(mp, "To: %s\n", info->mailaddr);
447 fprintf(mp, "Subject: %s event on %s:%s\n\n",
448 event, dev, hname);
449
450 fprintf(mp,
451 "This is an automatically generated mail message from %s\n", Name);
452 fprintf(mp, "running on %s\n\n", hname);
453
454 fprintf(mp,
455 "A %s event had been detected on md device %s.\n\n", event, dev);
456
457 if (disc && disc[0] != ' ')
458 fprintf(mp,
459 "It could be related to component device %s.\n\n", disc);
460 if (disc && disc[0] == ' ')
461 fprintf(mp, "Extra information:%s.\n\n", disc);
462
463 fprintf(mp, "Faithfully yours, etc.\n");
464
465 mdstat = fopen("/proc/mdstat", "r");
466 if (mdstat) {
467 char buf[8192];
468 int n;
469 fprintf(mp,
470 "\nP.S. The /proc/mdstat file currently contains the following:\n\n");
471 while ((n = fread(buf, 1, sizeof(buf),
472 mdstat)) > 0)
473 n = fwrite(buf, 1, n, mp);
474 fclose(mdstat);
475 }
476 pclose(mp);
477 }
478 }
479
480 /* log the event to syslog maybe */
481 if (info->dosyslog) {
482 /* Log at a different severity depending on the event.
483 *
484 * These are the critical events: */
485 if (strncmp(event, "Fail", 4) == 0 ||
486 strncmp(event, "Degrade", 7) == 0 ||
487 strncmp(event, "DeviceDisappeared", 17) == 0)
488 priority = LOG_CRIT;
489 /* Good to know about, but are not failures: */
490 else if (strncmp(event, "Rebuild", 7) == 0 ||
491 strncmp(event, "MoveSpare", 9) == 0 ||
492 strncmp(event, "Spares", 6) != 0)
493 priority = LOG_WARNING;
494 /* Everything else: */
495 else
496 priority = LOG_INFO;
497
498 if (disc && disc[0] != ' ')
499 syslog(priority,
500 "%s event detected on md device %s, component device %s", event, dev, disc);
501 else if (disc)
502 syslog(priority,
503 "%s event detected on md device %s: %s",
504 event, dev, disc);
505 else
506 syslog(priority,
507 "%s event detected on md device %s",
508 event, dev);
509 }
510 }
511
512 static int check_array(struct state *st, struct mdstat_ent *mdstat,
513 int test, struct alert_info *ainfo,
514 int increments, char *prefer)
515 {
516 /* Update the state 'st' to reflect any changes shown in mdstat,
517 * or found by directly examining the array, and return
518 * '1' if the array is degraded, or '0' if it is optimal (or dead).
519 */
520 struct { int state, major, minor; } info[MAX_DISKS];
521 struct mdinfo *sra = NULL;
522 mdu_array_info_t array;
523 struct mdstat_ent *mse = NULL, *mse2;
524 char *dev = st->devname;
525 int fd;
526 int i;
527 int remaining_disks;
528 int last_disk;
529 int new_array = 0;
530 int retval;
531 int is_container = 0;
532 unsigned long redundancy_only_flags = 0;
533
534 if (test)
535 alert("TestMessage", dev, NULL, ainfo);
536
537 retval = 0;
538
539 fd = open(dev, O_RDONLY);
540 if (fd < 0)
541 goto disappeared;
542
543 if (st->devnm[0] == 0)
544 strcpy(st->devnm, fd2devnm(fd));
545
546 for (mse2 = mdstat; mse2; mse2 = mse2->next)
547 if (strcmp(mse2->devnm, st->devnm) == 0) {
548 mse2->devnm[0] = 0; /* flag it as "used" */
549 mse = mse2;
550 }
551
552 if (!mse) {
553 /* duplicated array in statelist
554 * or re-created after reading mdstat
555 */
556 st->err++;
557 goto out;
558 }
559
560 if (mse->level == NULL)
561 is_container = 1;
562
563 if (!is_container && !md_array_active(fd))
564 goto disappeared;
565
566 fcntl(fd, F_SETFD, FD_CLOEXEC);
567 if (md_get_array_info(fd, &array) < 0)
568 goto disappeared;
569
570 if (!is_container && map_name(pers, mse->level) > 0)
571 redundancy_only_flags |= GET_MISMATCH;
572
573 sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS |
574 GET_STATE | redundancy_only_flags);
575
576 if (!sra)
577 goto disappeared;
578
579 /* It's much easier to list what array levels can't
580 * have a device disappear than all of them that can
581 */
582 if (sra->array.level == 0 || sra->array.level == -1) {
583 if (!st->err && !st->from_config)
584 alert("DeviceDisappeared", dev, " Wrong-Level", ainfo);
585 st->err++;
586 goto out;
587 }
588
589 /* this array is in /proc/mdstat */
590 if (array.utime == 0)
591 /* external arrays don't update utime, so
592 * just make sure it is always different. */
593 array.utime = st->utime + 1;;
594
595 if (st->err) {
596 /* New array appeared where previously had an error */
597 st->err = 0;
598 st->percent = RESYNC_NONE;
599 new_array = 1;
600 if (!is_container)
601 alert("NewArray", st->devname, NULL, ainfo);
602 }
603
604 if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
605 st->working == sra->array.working_disks &&
606 st->spare == sra->array.spare_disks &&
607 (mse == NULL || (mse->percent == st->percent))) {
608 if ((st->active < st->raid) && st->spare == 0)
609 retval = 1;
610 goto out;
611 }
612 if (st->utime == 0 && /* new array */
613 mse->pattern && strchr(mse->pattern, '_') /* degraded */)
614 alert("DegradedArray", dev, NULL, ainfo);
615
616 if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
617 sra->array.spare_disks < st->expected_spares)
618 alert("SparesMissing", dev, NULL, ainfo);
619 if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
620 mse->percent >= 0)
621 alert("RebuildStarted", dev, NULL, ainfo);
622 if (st->percent >= 0 && mse->percent >= 0 &&
623 (mse->percent / increments) > (st->percent / increments)) {
624 char percentalert[18];
625 /*
626 * "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
627 */
628
629 if((mse->percent / increments) == 0)
630 snprintf(percentalert, sizeof(percentalert),
631 "RebuildStarted");
632 else
633 snprintf(percentalert, sizeof(percentalert),
634 "Rebuild%02d", mse->percent);
635
636 alert(percentalert, dev, NULL, ainfo);
637 }
638
639 if (mse->percent == RESYNC_NONE && st->percent >= 0) {
640 /* Rebuild/sync/whatever just finished.
641 * If there is a number in /mismatch_cnt,
642 * we should report that.
643 */
644 if (sra && sra->mismatch_cnt > 0) {
645 char cnt[80];
646 snprintf(cnt, sizeof(cnt),
647 " mismatches found: %d (on raid level %d)",
648 sra->mismatch_cnt, sra->array.level);
649 alert("RebuildFinished", dev, cnt, ainfo);
650 } else
651 alert("RebuildFinished", dev, NULL, ainfo);
652 }
653 st->percent = mse->percent;
654
655 remaining_disks = sra->array.nr_disks;
656 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
657 mdu_disk_info_t disc;
658 disc.number = i;
659 if (md_get_disk_info(fd, &disc) >= 0) {
660 info[i].state = disc.state;
661 info[i].major = disc.major;
662 info[i].minor = disc.minor;
663 if (disc.major || disc.minor)
664 remaining_disks --;
665 } else
666 info[i].major = info[i].minor = 0;
667 }
668 last_disk = i;
669
670 if (mse->metadata_version &&
671 strncmp(mse->metadata_version, "external:", 9) == 0 &&
672 is_subarray(mse->metadata_version+9)) {
673 char *sl;
674 strcpy(st->parent_devnm, mse->metadata_version + 10);
675 sl = strchr(st->parent_devnm, '/');
676 if (sl)
677 *sl = 0;
678 } else
679 st->parent_devnm[0] = 0;
680 if (st->metadata == NULL && st->parent_devnm[0] == 0)
681 st->metadata = super_by_fd(fd, NULL);
682
683 for (i = 0; i < MAX_DISKS; i++) {
684 mdu_disk_info_t disc = {0, 0, 0, 0, 0};
685 int newstate = 0;
686 int change;
687 char *dv = NULL;
688 disc.number = i;
689 if (i < last_disk && (info[i].major || info[i].minor)) {
690 newstate = info[i].state;
691 dv = map_dev_preferred(info[i].major, info[i].minor, 1,
692 prefer);
693 disc.state = newstate;
694 disc.major = info[i].major;
695 disc.minor = info[i].minor;
696 } else
697 newstate = (1 << MD_DISK_REMOVED);
698
699 if (dv == NULL && st->devid[i])
700 dv = map_dev_preferred(major(st->devid[i]),
701 minor(st->devid[i]), 1, prefer);
702 change = newstate ^ st->devstate[i];
703 if (st->utime && change && !st->err && !new_array) {
704 if ((st->devstate[i]&change) & (1 << MD_DISK_SYNC))
705 alert("Fail", dev, dv, ainfo);
706 else if ((newstate & (1 << MD_DISK_FAULTY)) &&
707 (disc.major || disc.minor) &&
708 st->devid[i] == makedev(disc.major,
709 disc.minor))
710 alert("FailSpare", dev, dv, ainfo);
711 else if ((newstate&change) & (1 << MD_DISK_SYNC))
712 alert("SpareActive", dev, dv, ainfo);
713 }
714 st->devstate[i] = newstate;
715 st->devid[i] = makedev(disc.major, disc.minor);
716 }
717 st->active = sra->array.active_disks;
718 st->working = sra->array.working_disks;
719 st->spare = sra->array.spare_disks;
720 st->failed = sra->array.failed_disks;
721 st->utime = array.utime;
722 st->raid = sra->array.raid_disks;
723 st->err = 0;
724 if ((st->active < st->raid) && st->spare == 0)
725 retval = 1;
726
727 out:
728 if (sra)
729 sysfs_free(sra);
730 if (fd >= 0)
731 close(fd);
732 return retval;
733
734 disappeared:
735 if (!st->err && !is_container)
736 alert("DeviceDisappeared", dev, NULL, ainfo);
737 st->err++;
738 goto out;
739 }
740
741 static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
742 int test, struct alert_info *info)
743 {
744 struct mdstat_ent *mse;
745 int new_found = 0;
746 char *name;
747
748 for (mse = mdstat; mse; mse = mse->next)
749 if (mse->devnm[0] && (!mse->level || /* retrieve containers */
750 (strcmp(mse->level, "raid0") != 0 &&
751 strcmp(mse->level, "linear") != 0))) {
752 struct state *st = xcalloc(1, sizeof *st);
753 mdu_array_info_t array;
754 int fd;
755
756 name = get_md_name(mse->devnm);
757 if (!name) {
758 free(st);
759 continue;
760 }
761
762 st->devname = xstrdup(name);
763 if ((fd = open(st->devname, O_RDONLY)) < 0 ||
764 md_get_array_info(fd, &array) < 0) {
765 /* no such array */
766 if (fd >= 0)
767 close(fd);
768 put_md_name(st->devname);
769 free(st->devname);
770 if (st->metadata) {
771 st->metadata->ss->free_super(st->metadata);
772 free(st->metadata);
773 }
774 free(st);
775 continue;
776 }
777 close(fd);
778 st->next = *statelist;
779 st->err = 1;
780 st->from_auto = 1;
781 strcpy(st->devnm, mse->devnm);
782 st->percent = RESYNC_UNKNOWN;
783 st->expected_spares = -1;
784 if (mse->metadata_version &&
785 strncmp(mse->metadata_version,
786 "external:", 9) == 0 &&
787 is_subarray(mse->metadata_version+9)) {
788 char *sl;
789 strcpy(st->parent_devnm,
790 mse->metadata_version+10);
791 sl = strchr(st->parent_devnm, '/');
792 *sl = 0;
793 } else
794 st->parent_devnm[0] = 0;
795 *statelist = st;
796 if (test)
797 alert("TestMessage", st->devname, NULL, info);
798 new_found = 1;
799 }
800 return new_found;
801 }
802
803 static int get_required_spare_criteria(struct state *st,
804 struct spare_criteria *sc)
805 {
806 int fd;
807
808 if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
809 sc->min_size = 0;
810 sc->sector_size = 0;
811 return 0;
812 }
813
814 fd = open(st->devname, O_RDONLY);
815 if (fd < 0)
816 return 1;
817 if (st->metadata->ss->external)
818 st->metadata->ss->load_container(st->metadata, fd, st->devname);
819 else
820 st->metadata->ss->load_super(st->metadata, fd, st->devname);
821 close(fd);
822 if (!st->metadata->sb)
823 return 1;
824
825 st->metadata->ss->get_spare_criteria(st->metadata, sc);
826 st->metadata->ss->free_super(st->metadata);
827
828 return 0;
829 }
830
831 static int check_donor(struct state *from, struct state *to)
832 {
833 struct state *sub;
834
835 if (from == to)
836 return 0;
837 if (from->parent)
838 /* Cannot move from a member */
839 return 0;
840 if (from->err)
841 return 0;
842 for (sub = from->subarray; sub; sub = sub->subarray)
843 /* If source array has degraded subarrays, don't
844 * remove anything
845 */
846 if (sub->active < sub->raid)
847 return 0;
848 if (from->metadata->ss->external == 0)
849 if (from->active < from->raid)
850 return 0;
851 if (from->spare <= 0)
852 return 0;
853 return 1;
854 }
855
856 static dev_t choose_spare(struct state *from, struct state *to,
857 struct domainlist *domlist, struct spare_criteria *sc)
858 {
859 int d;
860 dev_t dev = 0;
861
862 for (d = from->raid; !dev && d < MAX_DISKS; d++) {
863 if (from->devid[d] > 0 && from->devstate[d] == 0) {
864 struct dev_policy *pol;
865 unsigned long long dev_size;
866 unsigned int dev_sector_size;
867
868 if (to->metadata->ss->external &&
869 test_partition_from_id(from->devid[d]))
870 continue;
871
872 if (sc->min_size &&
873 dev_size_from_id(from->devid[d], &dev_size) &&
874 dev_size < sc->min_size)
875 continue;
876
877 if (sc->sector_size &&
878 dev_sector_size_from_id(from->devid[d],
879 &dev_sector_size) &&
880 sc->sector_size != dev_sector_size)
881 continue;
882
883 pol = devid_policy(from->devid[d]);
884 if (from->spare_group)
885 pol_add(&pol, pol_domain,
886 from->spare_group, NULL);
887 if (domain_test(domlist, pol,
888 to->metadata->ss->name) == 1)
889 dev = from->devid[d];
890 dev_policy_free(pol);
891 }
892 }
893 return dev;
894 }
895
896 static dev_t container_choose_spare(struct state *from, struct state *to,
897 struct domainlist *domlist,
898 struct spare_criteria *sc, int active)
899 {
900 /* This is similar to choose_spare, but we cannot trust devstate,
901 * so we need to read the metadata instead
902 */
903 struct mdinfo *list;
904 struct supertype *st = from->metadata;
905 int fd = open(from->devname, O_RDONLY);
906 int err;
907 dev_t dev = 0;
908
909 if (fd < 0)
910 return 0;
911 if (!st->ss->getinfo_super_disks) {
912 close(fd);
913 return 0;
914 }
915
916 err = st->ss->load_container(st, fd, NULL);
917 close(fd);
918 if (err)
919 return 0;
920
921 if (from == to) {
922 /* We must check if number of active disks has not increased
923 * since ioctl in main loop. mdmon may have added spare
924 * to subarray. If so we do not need to look for more spares
925 * so return non zero value */
926 int active_cnt = 0;
927 struct mdinfo *dp;
928 list = st->ss->getinfo_super_disks(st);
929 if (!list) {
930 st->ss->free_super(st);
931 return 1;
932 }
933 dp = list->devs;
934 while (dp) {
935 if (dp->disk.state & (1 << MD_DISK_SYNC) &&
936 !(dp->disk.state & (1 << MD_DISK_FAULTY)))
937 active_cnt++;
938 dp = dp->next;
939 }
940 sysfs_free(list);
941 if (active < active_cnt) {
942 /* Spare just activated.*/
943 st->ss->free_super(st);
944 return 1;
945 }
946 }
947
948 /* We only need one spare so full list not needed */
949 list = container_choose_spares(st, sc, domlist, from->spare_group,
950 to->metadata->ss->name, 1);
951 if (list) {
952 struct mdinfo *disks = list->devs;
953 if (disks)
954 dev = makedev(disks->disk.major, disks->disk.minor);
955 sysfs_free(list);
956 }
957 st->ss->free_super(st);
958 return dev;
959 }
960
961 static void try_spare_migration(struct state *statelist, struct alert_info *info)
962 {
963 struct state *from;
964 struct state *st;
965 struct spare_criteria sc;
966
967 link_containers_with_subarrays(statelist);
968 for (st = statelist; st; st = st->next)
969 if (st->active < st->raid && st->spare == 0 && !st->err) {
970 struct domainlist *domlist = NULL;
971 int d;
972 struct state *to = st;
973
974 if (to->parent_devnm[0] && !to->parent)
975 /* subarray monitored without parent container
976 * we can't move spares here */
977 continue;
978
979 if (to->parent)
980 /* member of a container */
981 to = to->parent;
982
983 if (get_required_spare_criteria(to, &sc))
984 continue;
985 if (to->metadata->ss->external) {
986 /* We must make sure there is
987 * no suitable spare in container already.
988 * If there is we don't add more */
989 dev_t devid = container_choose_spare(
990 to, to, NULL, &sc, st->active);
991 if (devid > 0)
992 continue;
993 }
994 for (d = 0; d < MAX_DISKS; d++)
995 if (to->devid[d])
996 domainlist_add_dev(&domlist,
997 to->devid[d],
998 to->metadata->ss->name);
999 if (to->spare_group)
1000 domain_add(&domlist, to->spare_group);
1001 /*
1002 * No spare migration if the destination
1003 * has no domain. Skip this array.
1004 */
1005 if (!domlist)
1006 continue;
1007 for (from=statelist ; from ; from=from->next) {
1008 dev_t devid;
1009 if (!check_donor(from, to))
1010 continue;
1011 if (from->metadata->ss->external)
1012 devid = container_choose_spare(
1013 from, to, domlist, &sc, 0);
1014 else
1015 devid = choose_spare(from, to, domlist,
1016 &sc);
1017 if (devid > 0 &&
1018 move_spare(from->devname, to->devname,
1019 devid)) {
1020 alert("MoveSpare", to->devname,
1021 from->devname, info);
1022 break;
1023 }
1024 }
1025 domain_free(domlist);
1026 }
1027 }
1028
1029 /* search the statelist to connect external
1030 * metadata subarrays with their containers
1031 * We always completely rebuild the tree from scratch as
1032 * that is safest considering the possibility of entries
1033 * disappearing or changing.
1034 */
1035 static void link_containers_with_subarrays(struct state *list)
1036 {
1037 struct state *st;
1038 struct state *cont;
1039 for (st = list; st; st = st->next) {
1040 st->parent = NULL;
1041 st->subarray = NULL;
1042 }
1043 for (st = list; st; st = st->next)
1044 if (st->parent_devnm[0])
1045 for (cont = list; cont; cont = cont->next)
1046 if (!cont->err && cont->parent_devnm[0] == 0 &&
1047 strcmp(cont->devnm, st->parent_devnm) == 0) {
1048 st->parent = cont;
1049 st->subarray = cont->subarray;
1050 cont->subarray = st;
1051 break;
1052 }
1053 }
1054
1055 #ifndef NO_LIBUDEV
1056 /* function: check_udev_activity
1057 * Description: Function waits for udev to finish
1058 * events processing.
1059 * Returns:
1060 * 1 - detected error while opening udev
1061 * 2 - timeout
1062 * 0 - successfull completion
1063 */
1064 static int check_udev_activity(void)
1065 {
1066 struct udev *udev = NULL;
1067 struct udev_queue *udev_queue = NULL;
1068 int timeout_cnt = 30;
1069 int rc = 0;
1070
1071 /*
1072 * In rare cases systemd may not have udevm,
1073 * in such cases just exit with rc 0
1074 */
1075 if (!use_udev())
1076 goto out;
1077
1078 udev = udev_new();
1079 if (!udev) {
1080 rc = 1;
1081 goto out;
1082 }
1083
1084 udev_queue = udev_queue_new(udev);
1085 if (!udev_queue) {
1086 rc = 1;
1087 goto out;
1088 }
1089
1090 if (udev_queue_get_queue_is_empty(udev_queue))
1091 goto out;
1092
1093 while (!udev_queue_get_queue_is_empty(udev_queue)) {
1094 sleep(1);
1095
1096 if (timeout_cnt)
1097 timeout_cnt--;
1098 else {
1099 rc = 2;
1100 goto out;
1101 }
1102 }
1103
1104 out:
1105 if (udev_queue)
1106 udev_queue_unref(udev_queue);
1107 if (udev)
1108 udev_unref(udev);
1109 return rc;
1110 }
1111 #endif
1112
1113 /* Not really Monitor but ... */
1114 int Wait(char *dev)
1115 {
1116 char devnm[32];
1117 dev_t rdev;
1118 char *tmp;
1119 int rv = 1;
1120 int frozen_remaining = 3;
1121
1122 if (!stat_is_blkdev(dev, &rdev))
1123 return 2;
1124
1125 tmp = devid2devnm(rdev);
1126 if (!tmp) {
1127 pr_err("Cannot get md device name.\n");
1128 return 2;
1129 }
1130
1131 strcpy(devnm, tmp);
1132
1133 while(1) {
1134 struct mdstat_ent *ms = mdstat_read(1, 0);
1135 struct mdstat_ent *e;
1136
1137 for (e = ms; e; e = e->next)
1138 if (strcmp(e->devnm, devnm) == 0)
1139 break;
1140
1141 if (e && e->percent == RESYNC_NONE) {
1142 /* We could be in the brief pause before something
1143 * starts. /proc/mdstat doesn't show that, but
1144 * sync_action does.
1145 */
1146 struct mdinfo mdi;
1147 char buf[21];
1148
1149 if (sysfs_init(&mdi, -1, devnm))
1150 return 2;
1151 if (sysfs_get_str(&mdi, NULL, "sync_action",
1152 buf, 20) > 0 &&
1153 strcmp(buf,"idle\n") != 0) {
1154 e->percent = RESYNC_UNKNOWN;
1155 if (strcmp(buf, "frozen\n") == 0) {
1156 if (frozen_remaining == 0)
1157 e->percent = RESYNC_NONE;
1158 else
1159 frozen_remaining -= 1;
1160 }
1161 }
1162 }
1163 if (!e || e->percent == RESYNC_NONE) {
1164 if (e && e->metadata_version &&
1165 strncmp(e->metadata_version, "external:", 9) == 0) {
1166 if (is_subarray(&e->metadata_version[9]))
1167 ping_monitor(&e->metadata_version[9]);
1168 else
1169 ping_monitor(devnm);
1170 }
1171 free_mdstat(ms);
1172 return rv;
1173 }
1174 free_mdstat(ms);
1175 rv = 0;
1176 mdstat_wait(5);
1177 }
1178 }
1179
1180 /* The state "broken" is used only for RAID0/LINEAR - it's the same as
1181 * "clean", but used in case the array has one or more members missing.
1182 */
1183 static char *clean_states[] = {
1184 "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
1185
1186 int WaitClean(char *dev, int verbose)
1187 {
1188 int fd;
1189 struct mdinfo *mdi;
1190 int rv = 1;
1191 char devnm[32];
1192
1193 if (!stat_is_blkdev(dev, NULL))
1194 return 2;
1195 fd = open(dev, O_RDONLY);
1196 if (fd < 0) {
1197 if (verbose)
1198 pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
1199 return 1;
1200 }
1201
1202 strcpy(devnm, fd2devnm(fd));
1203 mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
1204 if (!mdi) {
1205 if (verbose)
1206 pr_err("Failed to read sysfs attributes for %s\n", dev);
1207 close(fd);
1208 return 0;
1209 }
1210
1211 switch(mdi->array.level) {
1212 case LEVEL_LINEAR:
1213 case LEVEL_MULTIPATH:
1214 case 0:
1215 /* safemode delay is irrelevant for these levels */
1216 rv = 0;
1217 }
1218
1219 /* for internal metadata the kernel handles the final clean
1220 * transition, containers can never be dirty
1221 */
1222 if (!is_subarray(mdi->text_version))
1223 rv = 0;
1224
1225 /* safemode disabled ? */
1226 if (mdi->safe_mode_delay == 0)
1227 rv = 0;
1228
1229 if (rv) {
1230 int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
1231 char buf[20];
1232 int delay = 5000;
1233
1234 /* minimize the safe_mode_delay and prepare to wait up to 5s
1235 * for writes to quiesce
1236 */
1237 sysfs_set_safemode(mdi, 1);
1238
1239 /* wait for array_state to be clean */
1240 while (1) {
1241 rv = read(state_fd, buf, sizeof(buf));
1242 if (rv < 0)
1243 break;
1244 if (sysfs_match_word(buf, clean_states) <
1245 (int)ARRAY_SIZE(clean_states) - 1)
1246 break;
1247 rv = sysfs_wait(state_fd, &delay);
1248 if (rv < 0 && errno != EINTR)
1249 break;
1250 lseek(state_fd, 0, SEEK_SET);
1251 }
1252 if (rv < 0)
1253 rv = 1;
1254 else if (ping_monitor(mdi->text_version) == 0) {
1255 /* we need to ping to close the window between array
1256 * state transitioning to clean and the metadata being
1257 * marked clean
1258 */
1259 rv = 0;
1260 } else {
1261 rv = 1;
1262 pr_err("Error connecting monitor with %s\n", dev);
1263 }
1264 if (rv && verbose)
1265 pr_err("Error waiting for %s to be clean\n", dev);
1266
1267 /* restore the original safe_mode_delay */
1268 sysfs_set_safemode(mdi, mdi->safe_mode_delay);
1269 close(state_fd);
1270 }
1271
1272 sysfs_free(mdi);
1273 close(fd);
1274
1275 return rv;
1276 }