]> git.ipfire.org Git - thirdparty/mdadm.git/blame - Monitor.c
Mdmonitor: Make alert_info global
[thirdparty/mdadm.git] / Monitor.c
CommitLineData
52826846 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
52826846 3 *
e736b623 4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
52826846
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
e736b623 22 * Email: <neilb@suse.de>
52826846
NB
23 */
24
9a9dab36 25#include "mdadm.h"
52826846
NB
26#include "md_p.h"
27#include "md_u.h"
e0d19036 28#include <sys/wait.h>
4450e59f 29#include <limits.h>
773135f5 30#include <syslog.h>
af3396da 31#ifndef NO_LIBUDEV
49b69533 32#include <libudev.h>
af3396da 33#endif
52826846 34
2e0172b1 35struct state {
84d969be
KT
36 char devname[MD_NAME_MAX + sizeof("/dev/md/")]; /* length of "/dev/md/" + device name + terminating byte*/
37 char devnm[MD_NAME_MAX]; /* to sync with mdstat info */
1d13b599 38 unsigned int utime;
2e0172b1
N
39 int err;
40 char *spare_group;
41 int active, working, failed, spare, raid;
f1661bd7 42 int from_config;
73ff0732 43 int from_auto;
2e0172b1 44 int expected_spares;
b0599bda
N
45 int devstate[MAX_DISKS];
46 dev_t devid[MAX_DISKS];
2e0172b1 47 int percent;
84d969be
KT
48 char parent_devnm[MD_NAME_MAX]; /* For subarray, devnm of parent.
49 * For others, ""
50 */
2e0172b1 51 struct supertype *metadata;
c3621c0a
ML
52 struct state *subarray;/* for a container it is a link to first subarray
53 * for a subarray it is a link to next subarray
54 * in the same container */
55 struct state *parent; /* for a subarray it is a link to its container
56 */
2e0172b1
N
57 struct state *next;
58};
59
e0bd6a96 60struct alert_info {
b3015166 61 char hostname[HOST_NAME_MAX];
e0bd6a96
N
62 char *mailaddr;
63 char *mailfrom;
64 char *alert_cmd;
65 int dosyslog;
b3015166
MG
66 int test;
67} info;
2e0172b1
N
68static int make_daemon(char *pidfile);
69static int check_one_sharer(int scan);
7f3b2d1d 70static void write_autorebuild_pid(void);
b3015166
MG
71static void alert(const char *event, const char *dev, const char *disc);
72static int check_array(struct state *st, struct mdstat_ent *mdstat, int increments, char *prefer);
73static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist);
74static void try_spare_migration(struct state *statelist);
c3621c0a 75static void link_containers_with_subarrays(struct state *list);
55c10e4d 76static void free_statelist(struct state *statelist);
af3396da 77#ifndef NO_LIBUDEV
49b69533 78static int check_udev_activity(void);
af3396da 79#endif
2e0172b1 80
a655e550 81int Monitor(struct mddev_dev *devlist,
52826846 82 char *mailaddr, char *alert_cmd,
95c50205
N
83 struct context *c,
84 int daemonise, int oneshot,
85 int dosyslog, char *pidfile, int increments,
86 int share)
52826846
NB
87{
88 /*
89 * Every few seconds, scan every md device looking for changes
90 * When a change is found, log it, possibly run the alert command,
91 * and possibly send Email
92 *
93 * For each array, we record:
94 * Update time
95 * active/working/failed/spare drives
96 * State of each device.
e0d19036 97 * %rebuilt if rebuilding
52826846
NB
98 *
99 * If the update time changes, check out all the data again
100 * It is possible that we cannot get the state of each device
101 * due to bugs in the md kernel module.
e0d19036
NB
102 * We also read /proc/mdstat to get rebuild percent,
103 * and to get state on all active devices incase of kernel bug.
52826846 104 *
e0d19036
NB
105 * Events are:
106 * Fail
107 * An active device had Faulty set or Active/Sync removed
108 * FailSpare
109 * A spare device had Faulty set
110 * SpareActive
111 * An active device had a reverse transition
112 * RebuildStarted
113 * percent went from -1 to +ve
9a36a9b7
ZB
114 * RebuildNN
115 * percent went from below to not-below NN%
e0d19036
NB
116 * DeviceDisappeared
117 * Couldn't access a device which was previously visible
52826846
NB
118 *
119 * if we detect an array with active<raid and spare==0
120 * we look at other arrays that have same spare-group
121 * If we find one with active==raid and spare>0,
122 * and if we can get_disk_info and find a name
123 * Then we hot-remove and hot-add to the other array
124 *
f40ac0e7 125 * If devlist is NULL, then we can monitor everything if --scan
e0d19036
NB
126 * was given. We get an initial list from config file and add anything
127 * that appears in /proc/mdstat
52826846
NB
128 */
129
2e0172b1 130 struct state *statelist = NULL;
52826846 131 int finished = 0;
e0d19036 132 struct mdstat_ent *mdstat = NULL;
72362f18 133 char *mailfrom;
9f3dd454 134 struct mddev_ident *mdlist;
af3396da 135 int delay_for_event = c->delay;
e0d19036 136
f40ac0e7
BK
137 if (devlist && c->scan) {
138 pr_err("Devices list and --scan option cannot be combined - not monitoring.\n");
139 return 1;
140 }
141
f5ff2988 142 if (!mailaddr)
8aec876d 143 mailaddr = conf_get_mailaddr();
4948b8f7 144
f5ff2988 145 if (!alert_cmd)
8aec876d 146 alert_cmd = conf_get_program();
f5ff2988
KT
147
148 mailfrom = conf_get_mailfrom();
149
95c50205 150 if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
e7b84f9d 151 pr_err("No mail address or alert command - not monitoring.\n");
d013a55e 152 return 1;
56eedc1a 153 }
f5ff2988
KT
154
155 if (c->verbose) {
156 pr_err("Monitor is started with delay %ds\n", c->delay);
157 if (mailaddr)
158 pr_err("Monitor using email address %s\n", mailaddr);
159 if (alert_cmd)
160 pr_err("Monitor using program %s\n", alert_cmd);
161 }
162
e0bd6a96
N
163 info.alert_cmd = alert_cmd;
164 info.mailaddr = mailaddr;
165 info.mailfrom = mailfrom;
166 info.dosyslog = dosyslog;
b3015166
MG
167 info.test = c->test;
168
169 if (gethostname(info.hostname, sizeof(info.hostname)) != 0) {
170 pr_err("Cannot get hostname.\n");
171 return 1;
172 }
173 info.hostname[sizeof(info.hostname) - 1] = '\0';
d013a55e 174
7f3b2d1d
BK
175 if (share){
176 if (check_one_sharer(c->scan))
177 return 1;
178 }
179
44d337f0
AC
180 if (daemonise) {
181 int rv = make_daemon(pidfile);
182 if (rv >= 0)
183 return rv;
184 }
e0d19036 185
1011e834 186 if (share)
7f3b2d1d 187 write_autorebuild_pid();
edde9560 188
e0d19036 189 if (devlist == NULL) {
9f3dd454 190 mdlist = conf_get_ident(NULL);
f566ef45 191 for (; mdlist; mdlist = mdlist->next) {
fe056d1f 192 struct state *st;
f566ef45 193
fe056d1f
N
194 if (mdlist->devname == NULL)
195 continue;
112cace6
N
196 if (strcasecmp(mdlist->devname, "<ignore>") == 0)
197 continue;
8b668d4a
LF
198 if (!is_mddev(mdlist->devname))
199 continue;
e702f392 200
503975b9 201 st = xcalloc(1, sizeof *st);
84d969be
KT
202 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"),
203 "/dev/md/%s", basename(mdlist->devname));
e0d19036 204 st->next = statelist;
4dd2df09 205 st->devnm[0] = 0;
9dad51d4 206 st->percent = RESYNC_UNKNOWN;
f1661bd7 207 st->from_config = 1;
feb716e9 208 st->expected_spares = mdlist->spare_disks;
e0d19036 209 if (mdlist->spare_group)
503975b9 210 st->spare_group = xstrdup(mdlist->spare_group);
e0d19036
NB
211 statelist = st;
212 }
213 } else {
a655e550 214 struct mddev_dev *dv;
f566ef45
JS
215
216 for (dv = devlist; dv; dv = dv->next) {
e702f392
KT
217 struct state *st;
218
8b668d4a
LF
219 if (!is_mddev(dv->devname))
220 continue;
e702f392
KT
221
222 st = xcalloc(1, sizeof *st);
9f3dd454 223 mdlist = conf_get_ident(dv->devname);
84d969be 224 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", dv->devname);
e0d19036 225 st->next = statelist;
4dd2df09 226 st->devnm[0] = 0;
9dad51d4 227 st->percent = RESYNC_UNKNOWN;
feb716e9 228 st->expected_spares = -1;
e5329c37
NB
229 if (mdlist) {
230 st->expected_spares = mdlist->spare_disks;
231 if (mdlist->spare_group)
503975b9 232 st->spare_group = xstrdup(mdlist->spare_group);
e5329c37 233 }
e0d19036
NB
234 statelist = st;
235 }
236 }
237
f566ef45 238 while (!finished) {
aa88f531 239 int new_found = 0;
73ff0732 240 struct state *st, **stp;
a90e1050 241 int anydegraded = 0;
007087d0 242 int anyredundant = 0;
e0d19036
NB
243
244 if (mdstat)
245 free_mdstat(mdstat);
f566ef45 246 mdstat = mdstat_read(oneshot ? 0 : 1, 0);
e0d19036 247
007087d0 248 for (st = statelist; st; st = st->next) {
b3015166 249 if (check_array(st, mdstat, increments, c->prefer))
a90e1050 250 anydegraded = 1;
007087d0
MT
251 /* for external arrays, metadata is filled for
252 * containers only
253 */
254 if (st->metadata && st->metadata->ss->external)
255 continue;
256 if (st->err == 0 && !anyredundant)
257 anyredundant = 1;
258 }
1011e834 259
e0d19036 260 /* now check if there are any new devices found in mdstat */
95c50205 261 if (c->scan)
b3015166 262 new_found = add_new_arrays(mdstat, &statelist);
2e0172b1 263
e0d19036
NB
264 /* If an array has active < raid && spare == 0 && spare_group != NULL
265 * Look for another array with spare > 0 and active == raid and same spare_group
49b69533 266 * if found, choose a device and hotremove/hotadd
e0d19036 267 */
a90e1050 268 if (share && anydegraded)
b3015166 269 try_spare_migration(statelist);
aa88f531
NB
270 if (!new_found) {
271 if (oneshot)
272 break;
007087d0 273 else if (!anyredundant) {
8c80d305 274 pr_err("No array with redundancy detected, stopping\n");
007087d0
MT
275 break;
276 }
e2308733 277 else {
af3396da 278#ifndef NO_LIBUDEV
cab9c67d 279 /*
af3396da
MT
280 * Wait for udevd to finish new devices
281 * processing.
cab9c67d 282 */
af3396da
MT
283 if (mdstat_wait(delay_for_event) &&
284 check_udev_activity())
49b69533 285 pr_err("Error while waiting for UDEV to complete new devices processing\n");
af3396da
MT
286#else
287 int wait_result = mdstat_wait(delay_for_event);
288 /*
289 * Give chance to process new device
290 */
291 if (wait_result != 0) {
292 if (c->delay > 5)
293 delay_for_event = 5;
294 } else
295 delay_for_event = c->delay;
296#endif
e2308733
MT
297 mdstat_close();
298 }
aa88f531 299 }
b3015166 300 info.test = 0;
73ff0732
N
301
302 for (stp = &statelist; (st = *stp) != NULL; ) {
303 if (st->from_auto && st->err > 5) {
304 *stp = st->next;
55c10e4d
PB
305 if (st->spare_group)
306 free(st->spare_group);
307
73ff0732
N
308 free(st);
309 } else
310 stp = &st->next;
311 }
52826846 312 }
55c10e4d
PB
313
314 free_statelist(statelist);
b657208c 315
b5e64645
NB
316 if (pidfile)
317 unlink(pidfile);
52826846
NB
318 return 0;
319}
320
2e0172b1
N
321static int make_daemon(char *pidfile)
322{
44d337f0
AC
323 /* Return:
324 * -1 in the forked daemon
325 * 0 in the parent
326 * 1 on error
327 * so a none-negative becomes the exit code.
328 */
2e0172b1
N
329 int pid = fork();
330 if (pid > 0) {
331 if (!pidfile)
332 printf("%d\n", pid);
333 else {
ca4b156b
MT
334 FILE *pid_file = NULL;
335 int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC,
336 0644);
337 if (fd >= 0)
338 pid_file = fdopen(fd, "w");
2e0172b1
N
339 if (!pid_file)
340 perror("cannot create pid file");
341 else {
342 fprintf(pid_file,"%d\n", pid);
343 fclose(pid_file);
344 }
345 }
346 return 0;
347 }
348 if (pid < 0) {
349 perror("daemonise");
350 return 1;
351 }
ff6bb131 352 manage_fork_fds(0);
2e0172b1 353 setsid();
44d337f0 354 return -1;
2e0172b1
N
355}
356
357static int check_one_sharer(int scan)
358{
185ec439
CL
359 int pid;
360 FILE *comm_fp;
2e0172b1 361 FILE *fp;
7f3b2d1d
BK
362 char comm_path[PATH_MAX];
363 char path[PATH_MAX];
185ec439
CL
364 char comm[20];
365
0011874f
JS
366 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
367 fp = fopen(path, "r");
2e0172b1 368 if (fp) {
71204a50
N
369 if (fscanf(fp, "%d", &pid) != 1)
370 pid = -1;
185ec439
CL
371 snprintf(comm_path, sizeof(comm_path),
372 "/proc/%d/comm", pid);
373 comm_fp = fopen(comm_path, "r");
374 if (comm_fp) {
1c66260d 375 if (fscanf(comm_fp, "%19s", comm) &&
185ec439
CL
376 strncmp(basename(comm), Name, strlen(Name)) == 0) {
377 if (scan) {
378 pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
379 fclose(comm_fp);
380 fclose(fp);
381 return 1;
382 } else {
383 pr_err("Warning: One autorebuild process already running.\n");
384 }
2e0172b1 385 }
185ec439 386 fclose(comm_fp);
2e0172b1
N
387 }
388 fclose(fp);
389 }
7f3b2d1d
BK
390 return 0;
391}
392
393static void write_autorebuild_pid()
394{
395 char path[PATH_MAX];
396 int pid;
ca4b156b 397 FILE *fp = NULL;
7f3b2d1d
BK
398 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
399
ca4b156b 400 if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) {
7f3b2d1d
BK
401 pr_err("Can't create autorebuild.pid file\n");
402 } else {
ca4b156b
MT
403 int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700);
404
405 if (fd >= 0)
406 fp = fdopen(fd, "w");
407
7f3b2d1d 408 if (!fp)
7a862a02 409 pr_err("Can't create autorebuild.pid file\n");
7f3b2d1d
BK
410 else {
411 pid = getpid();
412 fprintf(fp, "%d\n", pid);
413 fclose(fp);
2e0172b1
N
414 }
415 }
2e0172b1 416}
52826846 417
b3015166 418static void execute_alert_cmd(const char *event, const char *dev, const char *disc)
36988671
MG
419{
420 int pid = fork();
421
422 switch (pid) {
423 default:
424 waitpid(pid, NULL, 0);
425 break;
426 case -1:
427 pr_err("Cannot fork to execute alert command");
428 break;
429 case 0:
b3015166 430 execl(info.alert_cmd, info.alert_cmd, event, dev, disc, NULL);
36988671
MG
431 exit(2);
432 }
433}
434
b3015166 435static void send_event_email(const char *event, const char *dev, const char *disc)
36988671
MG
436{
437 FILE *mp, *mdstat;
36988671
MG
438 char buf[BUFSIZ];
439 int n;
440
441 mp = popen(Sendmail, "w");
442 if (!mp) {
443 pr_err("Cannot open pipe stream for sendmail.\n");
444 return;
445 }
446
36988671 447 signal(SIGPIPE, SIG_IGN);
b3015166
MG
448 if (info.mailfrom)
449 fprintf(mp, "From: %s\n", info.mailfrom);
36988671
MG
450 else
451 fprintf(mp, "From: %s monitoring <root>\n", Name);
b3015166
MG
452 fprintf(mp, "To: %s\n", info.mailaddr);
453 fprintf(mp, "Subject: %s event on %s:%s\n\n", event, dev, info.hostname);
36988671
MG
454 fprintf(mp, "This is an automatically generated mail message. \n");
455 fprintf(mp, "A %s event had been detected on md device %s.\n\n", event, dev);
456
457 if (disc && disc[0] != ' ')
458 fprintf(mp,
459 "It could be related to component device %s.\n\n", disc);
460 if (disc && disc[0] == ' ')
461 fprintf(mp, "Extra information:%s.\n\n", disc);
462
463 mdstat = fopen("/proc/mdstat", "r");
464 if (!mdstat) {
465 pr_err("Cannot open /proc/mdstat\n");
466 pclose(mp);
467 return;
468 }
469
470 fprintf(mp, "The /proc/mdstat file currently contains the following:\n\n");
471 while ((n = fread(buf, 1, sizeof(buf), mdstat)) > 0)
472 n = fwrite(buf, 1, n, mp);
473 fclose(mdstat);
474 pclose(mp);
475}
476
477static void log_event_to_syslog(const char *event, const char *dev, const char *disc)
52826846 478{
773135f5 479 int priority;
36988671
MG
480 /* Log at a different severity depending on the event.
481 *
482 * These are the critical events: */
483 if (strncmp(event, "Fail", 4) == 0 ||
484 strncmp(event, "Degrade", 7) == 0 ||
485 strncmp(event, "DeviceDisappeared", 17) == 0)
486 priority = LOG_CRIT;
487 /* Good to know about, but are not failures: */
488 else if (strncmp(event, "Rebuild", 7) == 0 ||
489 strncmp(event, "MoveSpare", 9) == 0 ||
490 strncmp(event, "Spares", 6) != 0)
491 priority = LOG_WARNING;
492 /* Everything else: */
493 else
494 priority = LOG_INFO;
495
496 if (disc && disc[0] != ' ')
497 syslog(priority,
498 "%s event detected on md device %s, component device %s", event, dev, disc);
499 else if (disc)
500 syslog(priority, "%s event detected on md device %s: %s", event, dev, disc);
501 else
502 syslog(priority, "%s event detected on md device %s", event, dev);
503}
773135f5 504
b3015166 505static void alert(const char *event, const char *dev, const char *disc)
36988671 506{
b3015166 507 if (!info.alert_cmd && !info.mailaddr && !info.dosyslog) {
cd29a5c8 508 time_t now = time(0);
aba69144 509
f566ef45
JS
510 printf("%1.15s: %s on %s %s\n", ctime(&now) + 4,
511 event, dev, disc?disc:"unknown device");
cd29a5c8 512 }
b3015166
MG
513 if (info.alert_cmd)
514 execute_alert_cmd(event, dev, disc);
36988671 515
b3015166 516 if (info.mailaddr && (strncmp(event, "Fail", 4) == 0 ||
f566ef45
JS
517 strncmp(event, "Test", 4) == 0 ||
518 strncmp(event, "Spares", 6) == 0 ||
519 strncmp(event, "Degrade", 7) == 0)) {
b3015166 520 send_event_email(event, dev, disc);
52826846 521 }
773135f5 522
b3015166 523 if (info.dosyslog)
36988671 524 log_event_to_syslog(event, dev, disc);
52826846 525}
b90c0e9a 526
a90e1050 527static int check_array(struct state *st, struct mdstat_ent *mdstat,
c2ecf5f6 528 int increments, char *prefer)
2e0172b1 529{
ff044d6b
AC
530 /* Update the state 'st' to reflect any changes shown in mdstat,
531 * or found by directly examining the array, and return
532 * '1' if the array is degraded, or '0' if it is optimal (or dead).
533 */
b3015166 534 struct { int state, major, minor; } disks_info[MAX_DISKS];
aed5f5c3 535 struct mdinfo *sra = NULL;
2e0172b1
N
536 mdu_array_info_t array;
537 struct mdstat_ent *mse = NULL, *mse2;
538 char *dev = st->devname;
b9a0309c 539 int fd;
2e0172b1 540 int i;
9e6d9291
N
541 int remaining_disks;
542 int last_disk;
721b662b 543 int new_array = 0;
1830e74b 544 int retval;
802961a2 545 int is_container = 0;
b3ab4e4d 546 unsigned long redundancy_only_flags = 0;
2e0172b1 547
b3015166
MG
548 if (info.test)
549 alert("TestMessage", dev, NULL);
b9a0309c 550
1830e74b
JS
551 retval = 0;
552
2e0172b1 553 fd = open(dev, O_RDONLY);
13e5d845
JS
554 if (fd < 0)
555 goto disappeared;
b9a0309c 556
802961a2 557 if (st->devnm[0] == 0)
84d969be 558 snprintf(st->devnm, MD_NAME_MAX, "%s", fd2devnm(fd));
802961a2
MT
559
560 for (mse2 = mdstat; mse2; mse2 = mse2->next)
561 if (strcmp(mse2->devnm, st->devnm) == 0) {
562 mse2->devnm[0] = 0; /* flag it as "used" */
563 mse = mse2;
564 }
565
566 if (!mse) {
567 /* duplicated array in statelist
568 * or re-created after reading mdstat
569 */
570 st->err++;
571 goto out;
572 }
573
574 if (mse->level == NULL)
575 is_container = 1;
576
2dab69c9 577 if (!is_container && !md_array_active(fd))
13e5d845 578 goto disappeared;
b9a0309c 579
2e0172b1 580 fcntl(fd, F_SETFD, FD_CLOEXEC);
13e5d845
JS
581 if (md_get_array_info(fd, &array) < 0)
582 goto disappeared;
583
b3ab4e4d
MT
584 if (!is_container && map_name(pers, mse->level) > 0)
585 redundancy_only_flags |= GET_MISMATCH;
802961a2
MT
586
587 sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS |
b3ab4e4d 588 GET_STATE | redundancy_only_flags);
aed5f5c3 589
aed5f5c3
JS
590 if (!sra)
591 goto disappeared;
592
2e0172b1
N
593 /* It's much easier to list what array levels can't
594 * have a device disappear than all of them that can
595 */
48bc2ade 596 if (sra->array.level == 0 || sra->array.level == -1) {
f1661bd7 597 if (!st->err && !st->from_config)
b3015166 598 alert("DeviceDisappeared", dev, " Wrong-Level");
73ff0732 599 st->err++;
1830e74b 600 goto out;
2e0172b1 601 }
2e0172b1 602
2e0172b1
N
603 /* this array is in /proc/mdstat */
604 if (array.utime == 0)
605 /* external arrays don't update utime, so
606 * just make sure it is always different. */
607 array.utime = st->utime + 1;;
608
0f760384 609 if (st->err) {
73ff0732 610 /* New array appeared where previously had an error */
0f760384
N
611 st->err = 0;
612 st->percent = RESYNC_NONE;
721b662b 613 new_array = 1;
007087d0 614 if (!is_container)
b3015166 615 alert("NewArray", st->devname, NULL);
0f760384
N
616 }
617
b8e5713c 618 if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
e5eb6857 619 st->working == sra->array.working_disks &&
b98943a4 620 st->spare == sra->array.spare_disks &&
d7be7d87 621 (mse == NULL || (mse->percent == st->percent))) {
ff044d6b 622 if ((st->active < st->raid) && st->spare == 0)
1830e74b
JS
623 retval = 1;
624 goto out;
2e0172b1
N
625 }
626 if (st->utime == 0 && /* new array */
f27904a5 627 mse->pattern && strchr(mse->pattern, '_') /* degraded */)
b3015166 628 alert("DegradedArray", dev, NULL);
2e0172b1 629
b98943a4
JS
630 if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
631 sra->array.spare_disks < st->expected_spares)
b3015166 632 alert("SparesMissing", dev, NULL);
9dad51d4 633 if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
2e0172b1 634 mse->percent >= 0)
b3015166 635 alert("RebuildStarted", dev, NULL);
f27904a5 636 if (st->percent >= 0 && mse->percent >= 0 &&
2e0172b1 637 (mse->percent / increments) > (st->percent / increments)) {
97589839 638 char percentalert[18];
f27904a5
JS
639 /*
640 * "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
641 */
2e0172b1
N
642
643 if((mse->percent / increments) == 0)
f27904a5
JS
644 snprintf(percentalert, sizeof(percentalert),
645 "RebuildStarted");
2e0172b1 646 else
f27904a5
JS
647 snprintf(percentalert, sizeof(percentalert),
648 "Rebuild%02d", mse->percent);
2e0172b1 649
b3015166 650 alert(percentalert, dev, NULL);
2e0172b1
N
651 }
652
f27904a5 653 if (mse->percent == RESYNC_NONE && st->percent >= 0) {
2e0172b1
N
654 /* Rebuild/sync/whatever just finished.
655 * If there is a number in /mismatch_cnt,
656 * we should report that.
657 */
2e0172b1 658 if (sra && sra->mismatch_cnt > 0) {
8453f8d0
LD
659 char cnt[80];
660 snprintf(cnt, sizeof(cnt),
661 " mismatches found: %d (on raid level %d)",
f566ef45 662 sra->mismatch_cnt, sra->array.level);
b3015166 663 alert("RebuildFinished", dev, cnt);
2e0172b1 664 } else
b3015166 665 alert("RebuildFinished", dev, NULL);
2e0172b1
N
666 }
667 st->percent = mse->percent;
668
b98943a4 669 remaining_disks = sra->array.nr_disks;
f27904a5 670 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
2e0172b1
N
671 mdu_disk_info_t disc;
672 disc.number = i;
d97572f5 673 if (md_get_disk_info(fd, &disc) >= 0) {
b3015166
MG
674 disks_info[i].state = disc.state;
675 disks_info[i].major = disc.major;
676 disks_info[i].minor = disc.minor;
9e6d9291
N
677 if (disc.major || disc.minor)
678 remaining_disks --;
2e0172b1 679 } else
b3015166 680 disks_info[i].major = disks_info[i].minor = 0;
2e0172b1 681 }
9e6d9291 682 last_disk = i;
2e0172b1 683
4019ad07
JL
684 if (mse->metadata_version &&
685 strncmp(mse->metadata_version, "external:", 9) == 0 &&
4dd2df09
N
686 is_subarray(mse->metadata_version+9)) {
687 char *sl;
84d969be 688 snprintf(st->parent_devnm, MD_NAME_MAX, "%s", mse->metadata_version + 10);
4dd2df09
N
689 sl = strchr(st->parent_devnm, '/');
690 if (sl)
691 *sl = 0;
692 } else
693 st->parent_devnm[0] = 0;
f27904a5 694 if (st->metadata == NULL && st->parent_devnm[0] == 0)
2e0172b1
N
695 st->metadata = super_by_fd(fd, NULL);
696
f566ef45
JS
697 for (i = 0; i < MAX_DISKS; i++) {
698 mdu_disk_info_t disc = {0, 0, 0, 0, 0};
699 int newstate = 0;
2e0172b1
N
700 int change;
701 char *dv = NULL;
702 disc.number = i;
b3015166
MG
703 if (i < last_disk && (disks_info[i].major || disks_info[i].minor)) {
704 newstate = disks_info[i].state;
705 dv = map_dev_preferred(disks_info[i].major, disks_info[i].minor, 1,
f27904a5 706 prefer);
2e0172b1 707 disc.state = newstate;
b3015166
MG
708 disc.major = disks_info[i].major;
709 disc.minor = disks_info[i].minor;
721b662b
N
710 } else
711 newstate = (1 << MD_DISK_REMOVED);
712
2e0172b1 713 if (dv == NULL && st->devid[i])
f27904a5
JS
714 dv = map_dev_preferred(major(st->devid[i]),
715 minor(st->devid[i]), 1, prefer);
2e0172b1 716 change = newstate ^ st->devstate[i];
721b662b 717 if (st->utime && change && !st->err && !new_array) {
f27904a5 718 if ((st->devstate[i]&change) & (1 << MD_DISK_SYNC))
b3015166 719 alert("Fail", dev, dv);
f27904a5 720 else if ((newstate & (1 << MD_DISK_FAULTY)) &&
2e0172b1 721 (disc.major || disc.minor) &&
f27904a5
JS
722 st->devid[i] == makedev(disc.major,
723 disc.minor))
b3015166 724 alert("FailSpare", dev, dv);
f27904a5 725 else if ((newstate&change) & (1 << MD_DISK_SYNC))
b3015166 726 alert("SpareActive", dev, dv);
2e0172b1
N
727 }
728 st->devstate[i] = newstate;
729 st->devid[i] = makedev(disc.major, disc.minor);
730 }
b98943a4 731 st->active = sra->array.active_disks;
e5eb6857 732 st->working = sra->array.working_disks;
b98943a4 733 st->spare = sra->array.spare_disks;
b8e5713c 734 st->failed = sra->array.failed_disks;
2e0172b1 735 st->utime = array.utime;
12a9d21f 736 st->raid = sra->array.raid_disks;
2e0172b1 737 st->err = 0;
a90e1050 738 if ((st->active < st->raid) && st->spare == 0)
1830e74b
JS
739 retval = 1;
740
741 out:
aed5f5c3
JS
742 if (sra)
743 sysfs_free(sra);
802961a2 744 if (fd >= 0)
13e5d845 745 close(fd);
1830e74b 746 return retval;
13e5d845
JS
747
748 disappeared:
007087d0 749 if (!st->err && !is_container)
b3015166 750 alert("DeviceDisappeared", dev, NULL);
13e5d845
JS
751 st->err++;
752 goto out;
2e0172b1
N
753}
754
b3015166 755static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist)
2e0172b1
N
756{
757 struct mdstat_ent *mse;
758 int new_found = 0;
1e08717f 759 char *name;
2e0172b1 760
f566ef45 761 for (mse = mdstat; mse; mse = mse->next)
d7be7d87 762 if (mse->devnm[0] && (!mse->level || /* retrieve containers */
f566ef45
JS
763 (strcmp(mse->level, "raid0") != 0 &&
764 strcmp(mse->level, "linear") != 0))) {
503975b9 765 struct state *st = xcalloc(1, sizeof *st);
2e0172b1
N
766 mdu_array_info_t array;
767 int fd;
1e08717f
SV
768
769 name = get_md_name(mse->devnm);
770 if (!name) {
771 free(st);
772 continue;
773 }
774
84d969be 775 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", name);
2e0172b1 776 if ((fd = open(st->devname, O_RDONLY)) < 0 ||
9cd39f01 777 md_get_array_info(fd, &array) < 0) {
2e0172b1 778 /* no such array */
9cd39f01
JS
779 if (fd >= 0)
780 close(fd);
2e0172b1 781 put_md_name(st->devname);
2e0172b1
N
782 if (st->metadata) {
783 st->metadata->ss->free_super(st->metadata);
784 free(st->metadata);
785 }
786 free(st);
787 continue;
788 }
789 close(fd);
83f3bc5f 790 st->next = *statelist;
2e0172b1 791 st->err = 1;
73ff0732 792 st->from_auto = 1;
84d969be 793 snprintf(st->devnm, MD_NAME_MAX, "%s", mse->devnm);
9dad51d4 794 st->percent = RESYNC_UNKNOWN;
2e0172b1 795 st->expected_spares = -1;
eb28e119 796 if (mse->metadata_version &&
f566ef45
JS
797 strncmp(mse->metadata_version,
798 "external:", 9) == 0 &&
4dd2df09
N
799 is_subarray(mse->metadata_version+9)) {
800 char *sl;
84d969be
KT
801 snprintf(st->parent_devnm, MD_NAME_MAX,
802 "%s", mse->metadata_version + 10);
4dd2df09
N
803 sl = strchr(st->parent_devnm, '/');
804 *sl = 0;
805 } else
806 st->parent_devnm[0] = 0;
83f3bc5f 807 *statelist = st;
b3015166
MG
808 if (info.test)
809 alert("TestMessage", st->devname, NULL);
2e0172b1
N
810 new_found = 1;
811 }
812 return new_found;
813}
814
fbfdcb06
AO
815static int get_required_spare_criteria(struct state *st,
816 struct spare_criteria *sc)
80e7f8c3
AC
817{
818 int fd;
80e7f8c3 819
f566ef45 820 if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
fbfdcb06 821 sc->min_size = 0;
4b57ecf6 822 sc->sector_size = 0;
de697acc 823 return 0;
300f5033 824 }
80e7f8c3
AC
825
826 fd = open(st->devname, O_RDONLY);
827 if (fd < 0)
de697acc
AC
828 return 1;
829 if (st->metadata->ss->external)
830 st->metadata->ss->load_container(st->metadata, fd, st->devname);
831 else
832 st->metadata->ss->load_super(st->metadata, fd, st->devname);
80e7f8c3 833 close(fd);
de697acc
AC
834 if (!st->metadata->sb)
835 return 1;
fbfdcb06
AO
836
837 st->metadata->ss->get_spare_criteria(st->metadata, sc);
80e7f8c3
AC
838 st->metadata->ss->free_super(st->metadata);
839
de697acc 840 return 0;
80e7f8c3
AC
841}
842
5ec0f373 843static int check_donor(struct state *from, struct state *to)
2feb22ef 844{
66f5c4b6
N
845 struct state *sub;
846
2feb22ef
N
847 if (from == to)
848 return 0;
66f5c4b6
N
849 if (from->parent)
850 /* Cannot move from a member */
2feb22ef 851 return 0;
ff044d6b
AC
852 if (from->err)
853 return 0;
66f5c4b6
N
854 for (sub = from->subarray; sub; sub = sub->subarray)
855 /* If source array has degraded subarrays, don't
856 * remove anything
857 */
858 if (sub->active < sub->raid)
859 return 0;
860 if (from->metadata->ss->external == 0)
861 if (from->active < from->raid)
862 return 0;
2feb22ef
N
863 if (from->spare <= 0)
864 return 0;
e78dda3b 865 return 1;
2feb22ef
N
866}
867
0f0749ad 868static dev_t choose_spare(struct state *from, struct state *to,
f566ef45 869 struct domainlist *domlist, struct spare_criteria *sc)
0fa21e85
N
870{
871 int d;
0f0749ad 872 dev_t dev = 0;
0fa21e85 873
b0599bda 874 for (d = from->raid; !dev && d < MAX_DISKS; d++) {
f566ef45 875 if (from->devid[d] > 0 && from->devstate[d] == 0) {
0fa21e85
N
876 struct dev_policy *pol;
877 unsigned long long dev_size;
4b57ecf6 878 unsigned int dev_sector_size;
0fa21e85 879
bfd76b93
CA
880 if (to->metadata->ss->external &&
881 test_partition_from_id(from->devid[d]))
882 continue;
883
fbfdcb06 884 if (sc->min_size &&
0fa21e85 885 dev_size_from_id(from->devid[d], &dev_size) &&
fbfdcb06 886 dev_size < sc->min_size)
0fa21e85
N
887 continue;
888
4b57ecf6
AO
889 if (sc->sector_size &&
890 dev_sector_size_from_id(from->devid[d],
891 &dev_sector_size) &&
892 sc->sector_size != dev_sector_size)
893 continue;
894
4dd2df09 895 pol = devid_policy(from->devid[d]);
0fa21e85
N
896 if (from->spare_group)
897 pol_add(&pol, pol_domain,
898 from->spare_group, NULL);
f566ef45
JS
899 if (domain_test(domlist, pol,
900 to->metadata->ss->name) == 1)
0fa21e85
N
901 dev = from->devid[d];
902 dev_policy_free(pol);
903 }
904 }
905 return dev;
906}
907
0f0749ad 908static dev_t container_choose_spare(struct state *from, struct state *to,
f0b85306 909 struct domainlist *domlist,
fbfdcb06 910 struct spare_criteria *sc, int active)
5739e0d0
N
911{
912 /* This is similar to choose_spare, but we cannot trust devstate,
913 * so we need to read the metadata instead
914 */
326727d9 915 struct mdinfo *list;
5739e0d0 916 struct supertype *st = from->metadata;
ff044d6b 917 int fd = open(from->devname, O_RDONLY);
5739e0d0 918 int err;
0f0749ad 919 dev_t dev = 0;
5739e0d0
N
920
921 if (fd < 0)
922 return 0;
326727d9
AC
923 if (!st->ss->getinfo_super_disks) {
924 close(fd);
5739e0d0 925 return 0;
326727d9 926 }
1011e834 927
5739e0d0
N
928 err = st->ss->load_container(st, fd, NULL);
929 close(fd);
930 if (err)
931 return 0;
1011e834 932
a1e49d69
CA
933 if (from == to) {
934 /* We must check if number of active disks has not increased
935 * since ioctl in main loop. mdmon may have added spare
936 * to subarray. If so we do not need to look for more spares
937 * so return non zero value */
938 int active_cnt = 0;
939 struct mdinfo *dp;
940 list = st->ss->getinfo_super_disks(st);
941 if (!list) {
942 st->ss->free_super(st);
943 return 1;
944 }
945 dp = list->devs;
946 while (dp) {
f566ef45
JS
947 if (dp->disk.state & (1 << MD_DISK_SYNC) &&
948 !(dp->disk.state & (1 << MD_DISK_FAULTY)))
a1e49d69
CA
949 active_cnt++;
950 dp = dp->next;
951 }
952 sysfs_free(list);
953 if (active < active_cnt) {
954 /* Spare just activated.*/
955 st->ss->free_super(st);
956 return 1;
957 }
958 }
959
326727d9 960 /* We only need one spare so full list not needed */
fbfdcb06 961 list = container_choose_spares(st, sc, domlist, from->spare_group,
326727d9
AC
962 to->metadata->ss->name, 1);
963 if (list) {
964 struct mdinfo *disks = list->devs;
965 if (disks)
966 dev = makedev(disks->disk.major, disks->disk.minor);
967 sysfs_free(list);
5739e0d0 968 }
326727d9 969 st->ss->free_super(st);
5739e0d0
N
970 return dev;
971}
972
b3015166 973static void try_spare_migration(struct state *statelist)
2e0172b1 974{
66f5c4b6
N
975 struct state *from;
976 struct state *st;
fbfdcb06 977 struct spare_criteria sc;
c3621c0a
ML
978
979 link_containers_with_subarrays(statelist);
66f5c4b6 980 for (st = statelist; st; st = st->next)
f566ef45 981 if (st->active < st->raid && st->spare == 0 && !st->err) {
e78dda3b
N
982 struct domainlist *domlist = NULL;
983 int d;
66f5c4b6
N
984 struct state *to = st;
985
4dd2df09 986 if (to->parent_devnm[0] && !to->parent)
c0dc0ad5
CA
987 /* subarray monitored without parent container
988 * we can't move spares here */
989 continue;
1011e834 990
66f5c4b6
N
991 if (to->parent)
992 /* member of a container */
993 to = to->parent;
e78dda3b 994
fbfdcb06 995 if (get_required_spare_criteria(to, &sc))
de697acc 996 continue;
e9a2ac02
AC
997 if (to->metadata->ss->external) {
998 /* We must make sure there is
999 * no suitable spare in container already.
1000 * If there is we don't add more */
1001 dev_t devid = container_choose_spare(
fbfdcb06 1002 to, to, NULL, &sc, st->active);
e9a2ac02
AC
1003 if (devid > 0)
1004 continue;
1005 }
b0599bda 1006 for (d = 0; d < MAX_DISKS; d++)
e78dda3b
N
1007 if (to->devid[d])
1008 domainlist_add_dev(&domlist,
1009 to->devid[d],
1010 to->metadata->ss->name);
1011 if (to->spare_group)
1012 domain_add(&domlist, to->spare_group);
5ec0f373
ML
1013 /*
1014 * No spare migration if the destination
1015 * has no domain. Skip this array.
1016 */
1017 if (!domlist)
1018 continue;
0fa21e85 1019 for (from=statelist ; from ; from=from->next) {
0f0749ad 1020 dev_t devid;
5ec0f373 1021 if (!check_donor(from, to))
0fa21e85 1022 continue;
5739e0d0
N
1023 if (from->metadata->ss->external)
1024 devid = container_choose_spare(
fbfdcb06 1025 from, to, domlist, &sc, 0);
5739e0d0 1026 else
f0b85306 1027 devid = choose_spare(from, to, domlist,
fbfdcb06 1028 &sc);
f566ef45
JS
1029 if (devid > 0 &&
1030 move_spare(from->devname, to->devname,
1031 devid)) {
b3015166 1032 alert("MoveSpare", to->devname, from->devname);
d52bb542
AC
1033 break;
1034 }
0fa21e85 1035 }
e78dda3b 1036 domain_free(domlist);
2e0172b1
N
1037 }
1038}
c3621c0a
ML
1039
1040/* search the statelist to connect external
1041 * metadata subarrays with their containers
1042 * We always completely rebuild the tree from scratch as
1043 * that is safest considering the possibility of entries
1044 * disappearing or changing.
1045 */
1046static void link_containers_with_subarrays(struct state *list)
1047{
1048 struct state *st;
1049 struct state *cont;
1050 for (st = list; st; st = st->next) {
1051 st->parent = NULL;
1052 st->subarray = NULL;
1053 }
1054 for (st = list; st; st = st->next)
4dd2df09 1055 if (st->parent_devnm[0])
c3621c0a 1056 for (cont = list; cont; cont = cont->next)
f566ef45 1057 if (!cont->err && cont->parent_devnm[0] == 0 &&
4dd2df09 1058 strcmp(cont->devnm, st->parent_devnm) == 0) {
c3621c0a
ML
1059 st->parent = cont;
1060 st->subarray = cont->subarray;
1061 cont->subarray = st;
1062 break;
1063 }
1064}
1065
55c10e4d
PB
1066/**
1067 * free_statelist() - Frees statelist.
1068 * @statelist: statelist to free
1069 */
1070static void free_statelist(struct state *statelist)
1071{
1072 struct state *tmp = NULL;
1073
1074 while (statelist) {
1075 if (statelist->spare_group)
1076 free(statelist->spare_group);
1077
1078 tmp = statelist;
1079 statelist = statelist->next;
1080 free(tmp);
1081 }
1082}
1083
af3396da 1084#ifndef NO_LIBUDEV
49b69533
OS
1085/* function: check_udev_activity
1086 * Description: Function waits for udev to finish
1087 * events processing.
1088 * Returns:
1089 * 1 - detected error while opening udev
1090 * 2 - timeout
1091 * 0 - successfull completion
1092 */
1093static int check_udev_activity(void)
1094{
1095 struct udev *udev = NULL;
1096 struct udev_queue *udev_queue = NULL;
1097 int timeout_cnt = 30;
1098 int rc = 0;
1099
1100 /*
1101 * In rare cases systemd may not have udevm,
1102 * in such cases just exit with rc 0
1103 */
1104 if (!use_udev())
1105 goto out;
1106
1107 udev = udev_new();
1108 if (!udev) {
1109 rc = 1;
1110 goto out;
1111 }
1112
1113 udev_queue = udev_queue_new(udev);
1114 if (!udev_queue) {
1115 rc = 1;
1116 goto out;
1117 }
1118
1119 if (udev_queue_get_queue_is_empty(udev_queue))
1120 goto out;
1121
1122 while (!udev_queue_get_queue_is_empty(udev_queue)) {
1123 sleep(1);
1124
1125 if (timeout_cnt)
1126 timeout_cnt--;
1127 else {
1128 rc = 2;
1129 goto out;
1130 }
1131 }
1132
1133out:
1134 if (udev_queue)
1135 udev_queue_unref(udev_queue);
1136 if (udev)
1137 udev_unref(udev);
1138 return rc;
1139}
af3396da 1140#endif
49b69533 1141
b90c0e9a
NB
1142/* Not really Monitor but ... */
1143int Wait(char *dev)
1144{
4dd2df09 1145 char devnm[32];
cb91230c
TM
1146 dev_t rdev;
1147 char *tmp;
b90c0e9a 1148 int rv = 1;
d3f6cf4f 1149 int frozen_remaining = 3;
b90c0e9a 1150
cb91230c 1151 if (!stat_is_blkdev(dev, &rdev))
b90c0e9a 1152 return 2;
cb91230c
TM
1153
1154 tmp = devid2devnm(rdev);
1155 if (!tmp) {
1156 pr_err("Cannot get md device name.\n");
1157 return 2;
1158 }
1159
1160 strcpy(devnm, tmp);
b90c0e9a
NB
1161
1162 while(1) {
1163 struct mdstat_ent *ms = mdstat_read(1, 0);
1164 struct mdstat_ent *e;
1165
f566ef45 1166 for (e = ms; e; e = e->next)
4dd2df09 1167 if (strcmp(e->devnm, devnm) == 0)
b90c0e9a
NB
1168 break;
1169
d3f6cf4f 1170 if (e && e->percent == RESYNC_NONE) {
276be514
N
1171 /* We could be in the brief pause before something
1172 * starts. /proc/mdstat doesn't show that, but
1173 * sync_action does.
1174 */
1175 struct mdinfo mdi;
1176 char buf[21];
dae13137
JS
1177
1178 if (sysfs_init(&mdi, -1, devnm))
1179 return 2;
276be514
N
1180 if (sysfs_get_str(&mdi, NULL, "sync_action",
1181 buf, 20) > 0 &&
d3f6cf4f 1182 strcmp(buf,"idle\n") != 0) {
276be514 1183 e->percent = RESYNC_UNKNOWN;
d3f6cf4f
N
1184 if (strcmp(buf, "frozen\n") == 0) {
1185 if (frozen_remaining == 0)
1186 e->percent = RESYNC_NONE;
1187 else
1188 frozen_remaining -= 1;
1189 }
1190 }
276be514 1191 }
639c3c10 1192 if (!e || e->percent == RESYNC_NONE) {
e7783ee6 1193 if (e && e->metadata_version &&
c94709e8
DW
1194 strncmp(e->metadata_version, "external:", 9) == 0) {
1195 if (is_subarray(&e->metadata_version[9]))
1196 ping_monitor(&e->metadata_version[9]);
1197 else
4dd2df09 1198 ping_monitor(devnm);
c94709e8 1199 }
b90c0e9a
NB
1200 free_mdstat(ms);
1201 return rv;
1202 }
89a10d84 1203 free_mdstat(ms);
b90c0e9a
NB
1204 rv = 0;
1205 mdstat_wait(5);
1206 }
1207}
679eb882 1208
43ebc910
GP
1209/* The state "broken" is used only for RAID0/LINEAR - it's the same as
1210 * "clean", but used in case the array has one or more members missing.
1211 */
679eb882 1212static char *clean_states[] = {
43ebc910 1213 "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
679eb882 1214
1ea04629 1215int WaitClean(char *dev, int verbose)
679eb882
N
1216{
1217 int fd;
1218 struct mdinfo *mdi;
1219 int rv = 1;
4dd2df09 1220 char devnm[32];
679eb882 1221
9e04ac1c
ZL
1222 if (!stat_is_blkdev(dev, NULL))
1223 return 2;
679eb882
N
1224 fd = open(dev, O_RDONLY);
1225 if (fd < 0) {
1226 if (verbose)
e7b84f9d 1227 pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
679eb882
N
1228 return 1;
1229 }
1230
4dd2df09
N
1231 strcpy(devnm, fd2devnm(fd));
1232 mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
679eb882
N
1233 if (!mdi) {
1234 if (verbose)
7a862a02 1235 pr_err("Failed to read sysfs attributes for %s\n", dev);
679eb882
N
1236 close(fd);
1237 return 0;
1238 }
1239
1240 switch(mdi->array.level) {
1241 case LEVEL_LINEAR:
1242 case LEVEL_MULTIPATH:
1243 case 0:
1244 /* safemode delay is irrelevant for these levels */
1245 rv = 0;
1246 }
1247
1248 /* for internal metadata the kernel handles the final clean
1249 * transition, containers can never be dirty
1250 */
1251 if (!is_subarray(mdi->text_version))
1252 rv = 0;
1253
1254 /* safemode disabled ? */
1255 if (mdi->safe_mode_delay == 0)
1256 rv = 0;
1257
1258 if (rv) {
4dd2df09 1259 int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
679eb882 1260 char buf[20];
efc67e8e 1261 int delay = 5000;
679eb882
N
1262
1263 /* minimize the safe_mode_delay and prepare to wait up to 5s
1264 * for writes to quiesce
1265 */
1266 sysfs_set_safemode(mdi, 1);
679eb882
N
1267
1268 /* wait for array_state to be clean */
1269 while (1) {
1270 rv = read(state_fd, buf, sizeof(buf));
1271 if (rv < 0)
1272 break;
43ebc910
GP
1273 if (sysfs_match_word(buf, clean_states) <
1274 (int)ARRAY_SIZE(clean_states) - 1)
679eb882 1275 break;
efc67e8e 1276 rv = sysfs_wait(state_fd, &delay);
679eb882
N
1277 if (rv < 0 && errno != EINTR)
1278 break;
1279 lseek(state_fd, 0, SEEK_SET);
1280 }
1281 if (rv < 0)
1282 rv = 1;
1ea04629 1283 else if (ping_monitor(mdi->text_version) == 0) {
679eb882
N
1284 /* we need to ping to close the window between array
1285 * state transitioning to clean and the metadata being
1286 * marked clean
1287 */
1288 rv = 0;
1ea04629 1289 } else {
679eb882 1290 rv = 1;
1ea04629
MT
1291 pr_err("Error connecting monitor with %s\n", dev);
1292 }
679eb882 1293 if (rv && verbose)
f566ef45 1294 pr_err("Error waiting for %s to be clean\n", dev);
679eb882
N
1295
1296 /* restore the original safe_mode_delay */
1297 sysfs_set_safemode(mdi, mdi->safe_mode_delay);
1298 close(state_fd);
1299 }
1300
1301 sysfs_free(mdi);
1302 close(fd);
1303
1304 return rv;
1305}