]> git.ipfire.org Git - thirdparty/mdadm.git/blame - Monitor.c
Mdmonitor: Pass events to alert() using enums instead of strings
[thirdparty/mdadm.git] / Monitor.c
CommitLineData
52826846 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
52826846 3 *
e736b623 4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
52826846
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
e736b623 22 * Email: <neilb@suse.de>
52826846
NB
23 */
24
9a9dab36 25#include "mdadm.h"
52826846
NB
26#include "md_p.h"
27#include "md_u.h"
e0d19036 28#include <sys/wait.h>
4450e59f 29#include <limits.h>
773135f5 30#include <syslog.h>
af3396da 31#ifndef NO_LIBUDEV
49b69533 32#include <libudev.h>
af3396da 33#endif
52826846 34
50232a6e
MG
35#define EVENT_NAME_MAX 32
36
2e0172b1 37struct state {
84d969be
KT
38 char devname[MD_NAME_MAX + sizeof("/dev/md/")]; /* length of "/dev/md/" + device name + terminating byte*/
39 char devnm[MD_NAME_MAX]; /* to sync with mdstat info */
1d13b599 40 unsigned int utime;
2e0172b1
N
41 int err;
42 char *spare_group;
43 int active, working, failed, spare, raid;
f1661bd7 44 int from_config;
73ff0732 45 int from_auto;
2e0172b1 46 int expected_spares;
b0599bda
N
47 int devstate[MAX_DISKS];
48 dev_t devid[MAX_DISKS];
2e0172b1 49 int percent;
84d969be
KT
50 char parent_devnm[MD_NAME_MAX]; /* For subarray, devnm of parent.
51 * For others, ""
52 */
2e0172b1 53 struct supertype *metadata;
c3621c0a
ML
54 struct state *subarray;/* for a container it is a link to first subarray
55 * for a subarray it is a link to next subarray
56 * in the same container */
57 struct state *parent; /* for a subarray it is a link to its container
58 */
2e0172b1
N
59 struct state *next;
60};
61
e0bd6a96 62struct alert_info {
b3015166 63 char hostname[HOST_NAME_MAX];
e0bd6a96
N
64 char *mailaddr;
65 char *mailfrom;
66 char *alert_cmd;
67 int dosyslog;
b3015166
MG
68 int test;
69} info;
50232a6e
MG
70
71enum event {
72 EVENT_SPARE_ACTIVE = 0,
73 EVENT_NEW_ARRAY,
74 EVENT_MOVE_SPARE,
75 EVENT_TEST_MESSAGE,
76 EVENT_REBUILD_STARTED,
77 EVENT_REBUILD,
78 EVENT_REBUILD_FINISHED,
79 EVENT_SPARES_MISSING,
80 EVENT_DEVICE_DISAPPEARED,
81 EVENT_FAIL,
82 EVENT_FAIL_SPARE,
83 EVENT_DEGRADED_ARRAY,
84 EVENT_UNKNOWN
85};
86
87mapping_t events_map[] = {
88 {"SpareActive", EVENT_SPARE_ACTIVE},
89 {"NewArray", EVENT_NEW_ARRAY},
90 {"MoveSpare", EVENT_MOVE_SPARE},
91 {"TestMessage", EVENT_TEST_MESSAGE},
92 {"RebuildStarted", EVENT_REBUILD_STARTED},
93 {"Rebuild", EVENT_REBUILD},
94 {"RebuildFinished", EVENT_REBUILD_FINISHED},
95 {"SparesMissing", EVENT_SPARES_MISSING},
96 {"DeviceDisappeared", EVENT_DEVICE_DISAPPEARED},
97 {"Fail", EVENT_FAIL},
98 {"FailSpare", EVENT_FAIL_SPARE},
99 {"DegradedArray", EVENT_DEGRADED_ARRAY},
100 {NULL, EVENT_UNKNOWN}
101};
102
2e0172b1
N
103static int make_daemon(char *pidfile);
104static int check_one_sharer(int scan);
7f3b2d1d 105static void write_autorebuild_pid(void);
50232a6e 106static void alert(const enum event event_enum, const unsigned int progress, const char *dev, const char *disc);
b3015166
MG
107static int check_array(struct state *st, struct mdstat_ent *mdstat, int increments, char *prefer);
108static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist);
109static void try_spare_migration(struct state *statelist);
c3621c0a 110static void link_containers_with_subarrays(struct state *list);
55c10e4d 111static void free_statelist(struct state *statelist);
af3396da 112#ifndef NO_LIBUDEV
49b69533 113static int check_udev_activity(void);
af3396da 114#endif
2e0172b1 115
a655e550 116int Monitor(struct mddev_dev *devlist,
52826846 117 char *mailaddr, char *alert_cmd,
95c50205
N
118 struct context *c,
119 int daemonise, int oneshot,
120 int dosyslog, char *pidfile, int increments,
121 int share)
52826846
NB
122{
123 /*
124 * Every few seconds, scan every md device looking for changes
125 * When a change is found, log it, possibly run the alert command,
126 * and possibly send Email
127 *
128 * For each array, we record:
129 * Update time
130 * active/working/failed/spare drives
131 * State of each device.
e0d19036 132 * %rebuilt if rebuilding
52826846
NB
133 *
134 * If the update time changes, check out all the data again
135 * It is possible that we cannot get the state of each device
136 * due to bugs in the md kernel module.
e0d19036
NB
137 * We also read /proc/mdstat to get rebuild percent,
138 * and to get state on all active devices incase of kernel bug.
52826846 139 *
e0d19036
NB
140 * Events are:
141 * Fail
142 * An active device had Faulty set or Active/Sync removed
143 * FailSpare
144 * A spare device had Faulty set
145 * SpareActive
146 * An active device had a reverse transition
147 * RebuildStarted
148 * percent went from -1 to +ve
9a36a9b7
ZB
149 * RebuildNN
150 * percent went from below to not-below NN%
e0d19036
NB
151 * DeviceDisappeared
152 * Couldn't access a device which was previously visible
52826846
NB
153 *
154 * if we detect an array with active<raid and spare==0
155 * we look at other arrays that have same spare-group
156 * If we find one with active==raid and spare>0,
157 * and if we can get_disk_info and find a name
158 * Then we hot-remove and hot-add to the other array
159 *
f40ac0e7 160 * If devlist is NULL, then we can monitor everything if --scan
e0d19036
NB
161 * was given. We get an initial list from config file and add anything
162 * that appears in /proc/mdstat
52826846
NB
163 */
164
2e0172b1 165 struct state *statelist = NULL;
52826846 166 int finished = 0;
e0d19036 167 struct mdstat_ent *mdstat = NULL;
72362f18 168 char *mailfrom;
9f3dd454 169 struct mddev_ident *mdlist;
af3396da 170 int delay_for_event = c->delay;
e0d19036 171
f40ac0e7
BK
172 if (devlist && c->scan) {
173 pr_err("Devices list and --scan option cannot be combined - not monitoring.\n");
174 return 1;
175 }
176
f5ff2988 177 if (!mailaddr)
8aec876d 178 mailaddr = conf_get_mailaddr();
4948b8f7 179
f5ff2988 180 if (!alert_cmd)
8aec876d 181 alert_cmd = conf_get_program();
f5ff2988
KT
182
183 mailfrom = conf_get_mailfrom();
184
95c50205 185 if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
e7b84f9d 186 pr_err("No mail address or alert command - not monitoring.\n");
d013a55e 187 return 1;
56eedc1a 188 }
f5ff2988
KT
189
190 if (c->verbose) {
191 pr_err("Monitor is started with delay %ds\n", c->delay);
192 if (mailaddr)
193 pr_err("Monitor using email address %s\n", mailaddr);
194 if (alert_cmd)
195 pr_err("Monitor using program %s\n", alert_cmd);
196 }
197
e0bd6a96
N
198 info.alert_cmd = alert_cmd;
199 info.mailaddr = mailaddr;
200 info.mailfrom = mailfrom;
201 info.dosyslog = dosyslog;
b3015166
MG
202 info.test = c->test;
203
204 if (gethostname(info.hostname, sizeof(info.hostname)) != 0) {
205 pr_err("Cannot get hostname.\n");
206 return 1;
207 }
208 info.hostname[sizeof(info.hostname) - 1] = '\0';
d013a55e 209
7f3b2d1d
BK
210 if (share){
211 if (check_one_sharer(c->scan))
212 return 1;
213 }
214
44d337f0
AC
215 if (daemonise) {
216 int rv = make_daemon(pidfile);
217 if (rv >= 0)
218 return rv;
219 }
e0d19036 220
1011e834 221 if (share)
7f3b2d1d 222 write_autorebuild_pid();
edde9560 223
e0d19036 224 if (devlist == NULL) {
9f3dd454 225 mdlist = conf_get_ident(NULL);
f566ef45 226 for (; mdlist; mdlist = mdlist->next) {
fe056d1f 227 struct state *st;
f566ef45 228
fe056d1f
N
229 if (mdlist->devname == NULL)
230 continue;
112cace6
N
231 if (strcasecmp(mdlist->devname, "<ignore>") == 0)
232 continue;
8b668d4a
LF
233 if (!is_mddev(mdlist->devname))
234 continue;
e702f392 235
503975b9 236 st = xcalloc(1, sizeof *st);
84d969be
KT
237 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"),
238 "/dev/md/%s", basename(mdlist->devname));
e0d19036 239 st->next = statelist;
4dd2df09 240 st->devnm[0] = 0;
9dad51d4 241 st->percent = RESYNC_UNKNOWN;
f1661bd7 242 st->from_config = 1;
feb716e9 243 st->expected_spares = mdlist->spare_disks;
e0d19036 244 if (mdlist->spare_group)
503975b9 245 st->spare_group = xstrdup(mdlist->spare_group);
e0d19036
NB
246 statelist = st;
247 }
248 } else {
a655e550 249 struct mddev_dev *dv;
f566ef45
JS
250
251 for (dv = devlist; dv; dv = dv->next) {
e702f392
KT
252 struct state *st;
253
8b668d4a
LF
254 if (!is_mddev(dv->devname))
255 continue;
e702f392
KT
256
257 st = xcalloc(1, sizeof *st);
9f3dd454 258 mdlist = conf_get_ident(dv->devname);
84d969be 259 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", dv->devname);
e0d19036 260 st->next = statelist;
4dd2df09 261 st->devnm[0] = 0;
9dad51d4 262 st->percent = RESYNC_UNKNOWN;
feb716e9 263 st->expected_spares = -1;
e5329c37
NB
264 if (mdlist) {
265 st->expected_spares = mdlist->spare_disks;
266 if (mdlist->spare_group)
503975b9 267 st->spare_group = xstrdup(mdlist->spare_group);
e5329c37 268 }
e0d19036
NB
269 statelist = st;
270 }
271 }
272
f566ef45 273 while (!finished) {
aa88f531 274 int new_found = 0;
73ff0732 275 struct state *st, **stp;
a90e1050 276 int anydegraded = 0;
007087d0 277 int anyredundant = 0;
e0d19036
NB
278
279 if (mdstat)
280 free_mdstat(mdstat);
f566ef45 281 mdstat = mdstat_read(oneshot ? 0 : 1, 0);
e0d19036 282
007087d0 283 for (st = statelist; st; st = st->next) {
b3015166 284 if (check_array(st, mdstat, increments, c->prefer))
a90e1050 285 anydegraded = 1;
007087d0
MT
286 /* for external arrays, metadata is filled for
287 * containers only
288 */
289 if (st->metadata && st->metadata->ss->external)
290 continue;
291 if (st->err == 0 && !anyredundant)
292 anyredundant = 1;
293 }
1011e834 294
e0d19036 295 /* now check if there are any new devices found in mdstat */
95c50205 296 if (c->scan)
b3015166 297 new_found = add_new_arrays(mdstat, &statelist);
2e0172b1 298
e0d19036
NB
299 /* If an array has active < raid && spare == 0 && spare_group != NULL
300 * Look for another array with spare > 0 and active == raid and same spare_group
49b69533 301 * if found, choose a device and hotremove/hotadd
e0d19036 302 */
a90e1050 303 if (share && anydegraded)
b3015166 304 try_spare_migration(statelist);
aa88f531
NB
305 if (!new_found) {
306 if (oneshot)
307 break;
007087d0 308 else if (!anyredundant) {
8c80d305 309 pr_err("No array with redundancy detected, stopping\n");
007087d0
MT
310 break;
311 }
e2308733 312 else {
af3396da 313#ifndef NO_LIBUDEV
cab9c67d 314 /*
af3396da
MT
315 * Wait for udevd to finish new devices
316 * processing.
cab9c67d 317 */
af3396da
MT
318 if (mdstat_wait(delay_for_event) &&
319 check_udev_activity())
49b69533 320 pr_err("Error while waiting for UDEV to complete new devices processing\n");
af3396da
MT
321#else
322 int wait_result = mdstat_wait(delay_for_event);
323 /*
324 * Give chance to process new device
325 */
326 if (wait_result != 0) {
327 if (c->delay > 5)
328 delay_for_event = 5;
329 } else
330 delay_for_event = c->delay;
331#endif
e2308733
MT
332 mdstat_close();
333 }
aa88f531 334 }
b3015166 335 info.test = 0;
73ff0732
N
336
337 for (stp = &statelist; (st = *stp) != NULL; ) {
338 if (st->from_auto && st->err > 5) {
339 *stp = st->next;
55c10e4d
PB
340 if (st->spare_group)
341 free(st->spare_group);
342
73ff0732
N
343 free(st);
344 } else
345 stp = &st->next;
346 }
52826846 347 }
55c10e4d
PB
348
349 free_statelist(statelist);
b657208c 350
b5e64645
NB
351 if (pidfile)
352 unlink(pidfile);
52826846
NB
353 return 0;
354}
355
2e0172b1
N
356static int make_daemon(char *pidfile)
357{
44d337f0
AC
358 /* Return:
359 * -1 in the forked daemon
360 * 0 in the parent
361 * 1 on error
362 * so a none-negative becomes the exit code.
363 */
2e0172b1
N
364 int pid = fork();
365 if (pid > 0) {
366 if (!pidfile)
367 printf("%d\n", pid);
368 else {
ca4b156b
MT
369 FILE *pid_file = NULL;
370 int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC,
371 0644);
372 if (fd >= 0)
373 pid_file = fdopen(fd, "w");
2e0172b1
N
374 if (!pid_file)
375 perror("cannot create pid file");
376 else {
377 fprintf(pid_file,"%d\n", pid);
378 fclose(pid_file);
379 }
380 }
381 return 0;
382 }
383 if (pid < 0) {
384 perror("daemonise");
385 return 1;
386 }
ff6bb131 387 manage_fork_fds(0);
2e0172b1 388 setsid();
44d337f0 389 return -1;
2e0172b1
N
390}
391
392static int check_one_sharer(int scan)
393{
185ec439
CL
394 int pid;
395 FILE *comm_fp;
2e0172b1 396 FILE *fp;
7f3b2d1d
BK
397 char comm_path[PATH_MAX];
398 char path[PATH_MAX];
185ec439
CL
399 char comm[20];
400
0011874f
JS
401 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
402 fp = fopen(path, "r");
2e0172b1 403 if (fp) {
71204a50
N
404 if (fscanf(fp, "%d", &pid) != 1)
405 pid = -1;
185ec439
CL
406 snprintf(comm_path, sizeof(comm_path),
407 "/proc/%d/comm", pid);
408 comm_fp = fopen(comm_path, "r");
409 if (comm_fp) {
1c66260d 410 if (fscanf(comm_fp, "%19s", comm) &&
185ec439
CL
411 strncmp(basename(comm), Name, strlen(Name)) == 0) {
412 if (scan) {
413 pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
414 fclose(comm_fp);
415 fclose(fp);
416 return 1;
417 } else {
418 pr_err("Warning: One autorebuild process already running.\n");
419 }
2e0172b1 420 }
185ec439 421 fclose(comm_fp);
2e0172b1
N
422 }
423 fclose(fp);
424 }
7f3b2d1d
BK
425 return 0;
426}
427
428static void write_autorebuild_pid()
429{
430 char path[PATH_MAX];
431 int pid;
ca4b156b 432 FILE *fp = NULL;
7f3b2d1d
BK
433 sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
434
ca4b156b 435 if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) {
7f3b2d1d
BK
436 pr_err("Can't create autorebuild.pid file\n");
437 } else {
ca4b156b
MT
438 int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700);
439
440 if (fd >= 0)
441 fp = fdopen(fd, "w");
442
7f3b2d1d 443 if (!fp)
7a862a02 444 pr_err("Can't create autorebuild.pid file\n");
7f3b2d1d
BK
445 else {
446 pid = getpid();
447 fprintf(fp, "%d\n", pid);
448 fclose(fp);
2e0172b1
N
449 }
450 }
2e0172b1 451}
52826846 452
50232a6e 453static void execute_alert_cmd(const char *event_name, const char *dev, const char *disc)
36988671
MG
454{
455 int pid = fork();
456
457 switch (pid) {
458 default:
459 waitpid(pid, NULL, 0);
460 break;
461 case -1:
462 pr_err("Cannot fork to execute alert command");
463 break;
464 case 0:
50232a6e 465 execl(info.alert_cmd, info.alert_cmd, event_name, dev, disc, NULL);
36988671
MG
466 exit(2);
467 }
468}
469
50232a6e 470static void send_event_email(const char *event_name, const char *dev, const char *disc)
36988671
MG
471{
472 FILE *mp, *mdstat;
36988671
MG
473 char buf[BUFSIZ];
474 int n;
475
476 mp = popen(Sendmail, "w");
477 if (!mp) {
478 pr_err("Cannot open pipe stream for sendmail.\n");
479 return;
480 }
481
36988671 482 signal(SIGPIPE, SIG_IGN);
b3015166
MG
483 if (info.mailfrom)
484 fprintf(mp, "From: %s\n", info.mailfrom);
36988671
MG
485 else
486 fprintf(mp, "From: %s monitoring <root>\n", Name);
b3015166 487 fprintf(mp, "To: %s\n", info.mailaddr);
50232a6e 488 fprintf(mp, "Subject: %s event on %s:%s\n\n", event_name, dev, info.hostname);
36988671 489 fprintf(mp, "This is an automatically generated mail message. \n");
50232a6e 490 fprintf(mp, "A %s event had been detected on md device %s.\n\n", event_name, dev);
36988671
MG
491
492 if (disc && disc[0] != ' ')
493 fprintf(mp,
494 "It could be related to component device %s.\n\n", disc);
495 if (disc && disc[0] == ' ')
496 fprintf(mp, "Extra information:%s.\n\n", disc);
497
498 mdstat = fopen("/proc/mdstat", "r");
499 if (!mdstat) {
500 pr_err("Cannot open /proc/mdstat\n");
501 pclose(mp);
502 return;
503 }
504
505 fprintf(mp, "The /proc/mdstat file currently contains the following:\n\n");
506 while ((n = fread(buf, 1, sizeof(buf), mdstat)) > 0)
507 n = fwrite(buf, 1, n, mp);
508 fclose(mdstat);
509 pclose(mp);
510}
511
50232a6e 512static void log_event_to_syslog(const enum event event_enum, const char *event_name, const char *dev, const char *disc)
52826846 513{
773135f5 514 int priority;
36988671
MG
515 /* Log at a different severity depending on the event.
516 *
517 * These are the critical events: */
50232a6e
MG
518 if (event_enum == EVENT_FAIL ||
519 event_enum == EVENT_DEGRADED_ARRAY ||
520 event_enum == EVENT_DEVICE_DISAPPEARED)
36988671
MG
521 priority = LOG_CRIT;
522 /* Good to know about, but are not failures: */
50232a6e
MG
523 else if (event_enum == EVENT_REBUILD ||
524 event_enum == EVENT_MOVE_SPARE ||
525 event_enum == EVENT_SPARES_MISSING)
36988671
MG
526 priority = LOG_WARNING;
527 /* Everything else: */
528 else
529 priority = LOG_INFO;
530
531 if (disc && disc[0] != ' ')
532 syslog(priority,
50232a6e
MG
533 "%s event detected on md device %s, component device %s",
534 event_name, dev, disc);
36988671 535 else if (disc)
50232a6e 536 syslog(priority, "%s event detected on md device %s: %s", event_name, dev, disc);
36988671 537 else
50232a6e 538 syslog(priority, "%s event detected on md device %s", event_name, dev);
36988671 539}
773135f5 540
50232a6e 541static void alert(const enum event event_enum, const unsigned int progress, const char *dev, const char *disc)
36988671 542{
50232a6e 543 char event_name[EVENT_NAME_MAX];
aba69144 544
50232a6e
MG
545 if (event_enum == EVENT_REBUILD) {
546 snprintf(event_name, sizeof(event_name), "%s%02d",
547 map_num_s(events_map, EVENT_REBUILD), progress);
548 } else {
549 snprintf(event_name, sizeof(event_name), "%s", map_num_s(events_map, event_enum));
cd29a5c8 550 }
50232a6e 551
b3015166 552 if (info.alert_cmd)
50232a6e 553 execute_alert_cmd(event_name, dev, disc);
36988671 554
50232a6e
MG
555 if (info.mailaddr && (event_enum == EVENT_FAIL ||
556 event_enum == EVENT_TEST_MESSAGE ||
557 event_enum == EVENT_SPARES_MISSING ||
558 event_enum == EVENT_DEGRADED_ARRAY)) {
559 send_event_email(event_name, dev, disc);
52826846 560 }
773135f5 561
b3015166 562 if (info.dosyslog)
50232a6e 563 log_event_to_syslog(event_enum, event_name, dev, disc);
52826846 564}
b90c0e9a 565
a90e1050 566static int check_array(struct state *st, struct mdstat_ent *mdstat,
c2ecf5f6 567 int increments, char *prefer)
2e0172b1 568{
ff044d6b
AC
569 /* Update the state 'st' to reflect any changes shown in mdstat,
570 * or found by directly examining the array, and return
571 * '1' if the array is degraded, or '0' if it is optimal (or dead).
572 */
b3015166 573 struct { int state, major, minor; } disks_info[MAX_DISKS];
aed5f5c3 574 struct mdinfo *sra = NULL;
2e0172b1
N
575 mdu_array_info_t array;
576 struct mdstat_ent *mse = NULL, *mse2;
577 char *dev = st->devname;
b9a0309c 578 int fd;
2e0172b1 579 int i;
9e6d9291
N
580 int remaining_disks;
581 int last_disk;
721b662b 582 int new_array = 0;
1830e74b 583 int retval;
802961a2 584 int is_container = 0;
b3ab4e4d 585 unsigned long redundancy_only_flags = 0;
2e0172b1 586
b3015166 587 if (info.test)
50232a6e 588 alert(EVENT_TEST_MESSAGE, 0, dev, NULL);
b9a0309c 589
1830e74b
JS
590 retval = 0;
591
2e0172b1 592 fd = open(dev, O_RDONLY);
13e5d845
JS
593 if (fd < 0)
594 goto disappeared;
b9a0309c 595
802961a2 596 if (st->devnm[0] == 0)
84d969be 597 snprintf(st->devnm, MD_NAME_MAX, "%s", fd2devnm(fd));
802961a2
MT
598
599 for (mse2 = mdstat; mse2; mse2 = mse2->next)
600 if (strcmp(mse2->devnm, st->devnm) == 0) {
601 mse2->devnm[0] = 0; /* flag it as "used" */
602 mse = mse2;
603 }
604
605 if (!mse) {
606 /* duplicated array in statelist
607 * or re-created after reading mdstat
608 */
609 st->err++;
610 goto out;
611 }
612
613 if (mse->level == NULL)
614 is_container = 1;
615
2dab69c9 616 if (!is_container && !md_array_active(fd))
13e5d845 617 goto disappeared;
b9a0309c 618
2e0172b1 619 fcntl(fd, F_SETFD, FD_CLOEXEC);
13e5d845
JS
620 if (md_get_array_info(fd, &array) < 0)
621 goto disappeared;
622
b3ab4e4d
MT
623 if (!is_container && map_name(pers, mse->level) > 0)
624 redundancy_only_flags |= GET_MISMATCH;
802961a2
MT
625
626 sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS |
b3ab4e4d 627 GET_STATE | redundancy_only_flags);
aed5f5c3 628
aed5f5c3
JS
629 if (!sra)
630 goto disappeared;
631
2e0172b1
N
632 /* It's much easier to list what array levels can't
633 * have a device disappear than all of them that can
634 */
48bc2ade 635 if (sra->array.level == 0 || sra->array.level == -1) {
f1661bd7 636 if (!st->err && !st->from_config)
50232a6e 637 alert(EVENT_DEVICE_DISAPPEARED, 0, dev, " Wrong-Level");
73ff0732 638 st->err++;
1830e74b 639 goto out;
2e0172b1 640 }
2e0172b1 641
2e0172b1
N
642 /* this array is in /proc/mdstat */
643 if (array.utime == 0)
644 /* external arrays don't update utime, so
645 * just make sure it is always different. */
646 array.utime = st->utime + 1;;
647
0f760384 648 if (st->err) {
73ff0732 649 /* New array appeared where previously had an error */
0f760384
N
650 st->err = 0;
651 st->percent = RESYNC_NONE;
721b662b 652 new_array = 1;
007087d0 653 if (!is_container)
50232a6e 654 alert(EVENT_NEW_ARRAY, 0, st->devname, NULL);
0f760384
N
655 }
656
b8e5713c 657 if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
e5eb6857 658 st->working == sra->array.working_disks &&
b98943a4 659 st->spare == sra->array.spare_disks &&
d7be7d87 660 (mse == NULL || (mse->percent == st->percent))) {
ff044d6b 661 if ((st->active < st->raid) && st->spare == 0)
1830e74b
JS
662 retval = 1;
663 goto out;
2e0172b1
N
664 }
665 if (st->utime == 0 && /* new array */
f27904a5 666 mse->pattern && strchr(mse->pattern, '_') /* degraded */)
50232a6e 667 alert(EVENT_DEGRADED_ARRAY, 0, dev, NULL);
2e0172b1 668
b98943a4
JS
669 if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
670 sra->array.spare_disks < st->expected_spares)
50232a6e 671 alert(EVENT_SPARES_MISSING, 0, dev, NULL);
9dad51d4 672 if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
2e0172b1 673 mse->percent >= 0)
50232a6e 674 alert(EVENT_REBUILD_STARTED, 0, dev, NULL);
f27904a5 675 if (st->percent >= 0 && mse->percent >= 0 &&
2e0172b1 676 (mse->percent / increments) > (st->percent / increments)) {
2e0172b1 677 if((mse->percent / increments) == 0)
50232a6e 678 alert(EVENT_REBUILD_STARTED, 0, dev, NULL);
2e0172b1 679 else
50232a6e 680 alert(EVENT_REBUILD, mse->percent, dev, NULL);
2e0172b1
N
681 }
682
f27904a5 683 if (mse->percent == RESYNC_NONE && st->percent >= 0) {
2e0172b1
N
684 /* Rebuild/sync/whatever just finished.
685 * If there is a number in /mismatch_cnt,
686 * we should report that.
687 */
2e0172b1 688 if (sra && sra->mismatch_cnt > 0) {
8453f8d0
LD
689 char cnt[80];
690 snprintf(cnt, sizeof(cnt),
691 " mismatches found: %d (on raid level %d)",
f566ef45 692 sra->mismatch_cnt, sra->array.level);
50232a6e 693 alert(EVENT_REBUILD_FINISHED, 0, dev, cnt);
2e0172b1 694 } else
50232a6e 695 alert(EVENT_REBUILD_FINISHED, 0, dev, NULL);
2e0172b1
N
696 }
697 st->percent = mse->percent;
698
b98943a4 699 remaining_disks = sra->array.nr_disks;
f27904a5 700 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
2e0172b1
N
701 mdu_disk_info_t disc;
702 disc.number = i;
d97572f5 703 if (md_get_disk_info(fd, &disc) >= 0) {
b3015166
MG
704 disks_info[i].state = disc.state;
705 disks_info[i].major = disc.major;
706 disks_info[i].minor = disc.minor;
9e6d9291
N
707 if (disc.major || disc.minor)
708 remaining_disks --;
2e0172b1 709 } else
b3015166 710 disks_info[i].major = disks_info[i].minor = 0;
2e0172b1 711 }
9e6d9291 712 last_disk = i;
2e0172b1 713
4019ad07
JL
714 if (mse->metadata_version &&
715 strncmp(mse->metadata_version, "external:", 9) == 0 &&
4dd2df09
N
716 is_subarray(mse->metadata_version+9)) {
717 char *sl;
84d969be 718 snprintf(st->parent_devnm, MD_NAME_MAX, "%s", mse->metadata_version + 10);
4dd2df09
N
719 sl = strchr(st->parent_devnm, '/');
720 if (sl)
721 *sl = 0;
722 } else
723 st->parent_devnm[0] = 0;
f27904a5 724 if (st->metadata == NULL && st->parent_devnm[0] == 0)
2e0172b1
N
725 st->metadata = super_by_fd(fd, NULL);
726
f566ef45
JS
727 for (i = 0; i < MAX_DISKS; i++) {
728 mdu_disk_info_t disc = {0, 0, 0, 0, 0};
729 int newstate = 0;
2e0172b1
N
730 int change;
731 char *dv = NULL;
732 disc.number = i;
b3015166
MG
733 if (i < last_disk && (disks_info[i].major || disks_info[i].minor)) {
734 newstate = disks_info[i].state;
735 dv = map_dev_preferred(disks_info[i].major, disks_info[i].minor, 1,
f27904a5 736 prefer);
2e0172b1 737 disc.state = newstate;
b3015166
MG
738 disc.major = disks_info[i].major;
739 disc.minor = disks_info[i].minor;
721b662b
N
740 } else
741 newstate = (1 << MD_DISK_REMOVED);
742
2e0172b1 743 if (dv == NULL && st->devid[i])
f27904a5
JS
744 dv = map_dev_preferred(major(st->devid[i]),
745 minor(st->devid[i]), 1, prefer);
2e0172b1 746 change = newstate ^ st->devstate[i];
721b662b 747 if (st->utime && change && !st->err && !new_array) {
f27904a5 748 if ((st->devstate[i]&change) & (1 << MD_DISK_SYNC))
50232a6e 749 alert(EVENT_FAIL, 0, dev, dv);
f27904a5 750 else if ((newstate & (1 << MD_DISK_FAULTY)) &&
2e0172b1 751 (disc.major || disc.minor) &&
f27904a5
JS
752 st->devid[i] == makedev(disc.major,
753 disc.minor))
50232a6e 754 alert(EVENT_FAIL_SPARE, 0, dev, dv);
f27904a5 755 else if ((newstate&change) & (1 << MD_DISK_SYNC))
50232a6e 756 alert(EVENT_SPARE_ACTIVE, 0, dev, dv);
2e0172b1
N
757 }
758 st->devstate[i] = newstate;
759 st->devid[i] = makedev(disc.major, disc.minor);
760 }
b98943a4 761 st->active = sra->array.active_disks;
e5eb6857 762 st->working = sra->array.working_disks;
b98943a4 763 st->spare = sra->array.spare_disks;
b8e5713c 764 st->failed = sra->array.failed_disks;
2e0172b1 765 st->utime = array.utime;
12a9d21f 766 st->raid = sra->array.raid_disks;
2e0172b1 767 st->err = 0;
a90e1050 768 if ((st->active < st->raid) && st->spare == 0)
1830e74b
JS
769 retval = 1;
770
771 out:
aed5f5c3
JS
772 if (sra)
773 sysfs_free(sra);
802961a2 774 if (fd >= 0)
13e5d845 775 close(fd);
1830e74b 776 return retval;
13e5d845
JS
777
778 disappeared:
007087d0 779 if (!st->err && !is_container)
50232a6e 780 alert(EVENT_DEVICE_DISAPPEARED, 0, dev, NULL);
13e5d845
JS
781 st->err++;
782 goto out;
2e0172b1
N
783}
784
b3015166 785static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist)
2e0172b1
N
786{
787 struct mdstat_ent *mse;
788 int new_found = 0;
1e08717f 789 char *name;
2e0172b1 790
f566ef45 791 for (mse = mdstat; mse; mse = mse->next)
d7be7d87 792 if (mse->devnm[0] && (!mse->level || /* retrieve containers */
f566ef45
JS
793 (strcmp(mse->level, "raid0") != 0 &&
794 strcmp(mse->level, "linear") != 0))) {
503975b9 795 struct state *st = xcalloc(1, sizeof *st);
2e0172b1
N
796 mdu_array_info_t array;
797 int fd;
1e08717f
SV
798
799 name = get_md_name(mse->devnm);
800 if (!name) {
801 free(st);
802 continue;
803 }
804
84d969be 805 snprintf(st->devname, MD_NAME_MAX + sizeof("/dev/md/"), "%s", name);
2e0172b1 806 if ((fd = open(st->devname, O_RDONLY)) < 0 ||
9cd39f01 807 md_get_array_info(fd, &array) < 0) {
2e0172b1 808 /* no such array */
9cd39f01
JS
809 if (fd >= 0)
810 close(fd);
2e0172b1 811 put_md_name(st->devname);
2e0172b1
N
812 if (st->metadata) {
813 st->metadata->ss->free_super(st->metadata);
814 free(st->metadata);
815 }
816 free(st);
817 continue;
818 }
819 close(fd);
83f3bc5f 820 st->next = *statelist;
2e0172b1 821 st->err = 1;
73ff0732 822 st->from_auto = 1;
84d969be 823 snprintf(st->devnm, MD_NAME_MAX, "%s", mse->devnm);
9dad51d4 824 st->percent = RESYNC_UNKNOWN;
2e0172b1 825 st->expected_spares = -1;
eb28e119 826 if (mse->metadata_version &&
f566ef45
JS
827 strncmp(mse->metadata_version,
828 "external:", 9) == 0 &&
4dd2df09
N
829 is_subarray(mse->metadata_version+9)) {
830 char *sl;
84d969be
KT
831 snprintf(st->parent_devnm, MD_NAME_MAX,
832 "%s", mse->metadata_version + 10);
4dd2df09
N
833 sl = strchr(st->parent_devnm, '/');
834 *sl = 0;
835 } else
836 st->parent_devnm[0] = 0;
83f3bc5f 837 *statelist = st;
b3015166 838 if (info.test)
50232a6e 839 alert(EVENT_TEST_MESSAGE, 0, st->devname, NULL);
2e0172b1
N
840 new_found = 1;
841 }
842 return new_found;
843}
844
fbfdcb06
AO
845static int get_required_spare_criteria(struct state *st,
846 struct spare_criteria *sc)
80e7f8c3
AC
847{
848 int fd;
80e7f8c3 849
f566ef45 850 if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
fbfdcb06 851 sc->min_size = 0;
4b57ecf6 852 sc->sector_size = 0;
de697acc 853 return 0;
300f5033 854 }
80e7f8c3
AC
855
856 fd = open(st->devname, O_RDONLY);
857 if (fd < 0)
de697acc
AC
858 return 1;
859 if (st->metadata->ss->external)
860 st->metadata->ss->load_container(st->metadata, fd, st->devname);
861 else
862 st->metadata->ss->load_super(st->metadata, fd, st->devname);
80e7f8c3 863 close(fd);
de697acc
AC
864 if (!st->metadata->sb)
865 return 1;
fbfdcb06
AO
866
867 st->metadata->ss->get_spare_criteria(st->metadata, sc);
80e7f8c3
AC
868 st->metadata->ss->free_super(st->metadata);
869
de697acc 870 return 0;
80e7f8c3
AC
871}
872
5ec0f373 873static int check_donor(struct state *from, struct state *to)
2feb22ef 874{
66f5c4b6
N
875 struct state *sub;
876
2feb22ef
N
877 if (from == to)
878 return 0;
66f5c4b6
N
879 if (from->parent)
880 /* Cannot move from a member */
2feb22ef 881 return 0;
ff044d6b
AC
882 if (from->err)
883 return 0;
66f5c4b6
N
884 for (sub = from->subarray; sub; sub = sub->subarray)
885 /* If source array has degraded subarrays, don't
886 * remove anything
887 */
888 if (sub->active < sub->raid)
889 return 0;
890 if (from->metadata->ss->external == 0)
891 if (from->active < from->raid)
892 return 0;
2feb22ef
N
893 if (from->spare <= 0)
894 return 0;
e78dda3b 895 return 1;
2feb22ef
N
896}
897
0f0749ad 898static dev_t choose_spare(struct state *from, struct state *to,
f566ef45 899 struct domainlist *domlist, struct spare_criteria *sc)
0fa21e85
N
900{
901 int d;
0f0749ad 902 dev_t dev = 0;
0fa21e85 903
b0599bda 904 for (d = from->raid; !dev && d < MAX_DISKS; d++) {
f566ef45 905 if (from->devid[d] > 0 && from->devstate[d] == 0) {
0fa21e85
N
906 struct dev_policy *pol;
907 unsigned long long dev_size;
4b57ecf6 908 unsigned int dev_sector_size;
0fa21e85 909
bfd76b93
CA
910 if (to->metadata->ss->external &&
911 test_partition_from_id(from->devid[d]))
912 continue;
913
fbfdcb06 914 if (sc->min_size &&
0fa21e85 915 dev_size_from_id(from->devid[d], &dev_size) &&
fbfdcb06 916 dev_size < sc->min_size)
0fa21e85
N
917 continue;
918
4b57ecf6
AO
919 if (sc->sector_size &&
920 dev_sector_size_from_id(from->devid[d],
921 &dev_sector_size) &&
922 sc->sector_size != dev_sector_size)
923 continue;
924
4dd2df09 925 pol = devid_policy(from->devid[d]);
0fa21e85
N
926 if (from->spare_group)
927 pol_add(&pol, pol_domain,
928 from->spare_group, NULL);
f566ef45
JS
929 if (domain_test(domlist, pol,
930 to->metadata->ss->name) == 1)
0fa21e85
N
931 dev = from->devid[d];
932 dev_policy_free(pol);
933 }
934 }
935 return dev;
936}
937
0f0749ad 938static dev_t container_choose_spare(struct state *from, struct state *to,
f0b85306 939 struct domainlist *domlist,
fbfdcb06 940 struct spare_criteria *sc, int active)
5739e0d0
N
941{
942 /* This is similar to choose_spare, but we cannot trust devstate,
943 * so we need to read the metadata instead
944 */
326727d9 945 struct mdinfo *list;
5739e0d0 946 struct supertype *st = from->metadata;
ff044d6b 947 int fd = open(from->devname, O_RDONLY);
5739e0d0 948 int err;
0f0749ad 949 dev_t dev = 0;
5739e0d0
N
950
951 if (fd < 0)
952 return 0;
326727d9
AC
953 if (!st->ss->getinfo_super_disks) {
954 close(fd);
5739e0d0 955 return 0;
326727d9 956 }
1011e834 957
5739e0d0
N
958 err = st->ss->load_container(st, fd, NULL);
959 close(fd);
960 if (err)
961 return 0;
1011e834 962
a1e49d69
CA
963 if (from == to) {
964 /* We must check if number of active disks has not increased
965 * since ioctl in main loop. mdmon may have added spare
966 * to subarray. If so we do not need to look for more spares
967 * so return non zero value */
968 int active_cnt = 0;
969 struct mdinfo *dp;
970 list = st->ss->getinfo_super_disks(st);
971 if (!list) {
972 st->ss->free_super(st);
973 return 1;
974 }
975 dp = list->devs;
976 while (dp) {
f566ef45
JS
977 if (dp->disk.state & (1 << MD_DISK_SYNC) &&
978 !(dp->disk.state & (1 << MD_DISK_FAULTY)))
a1e49d69
CA
979 active_cnt++;
980 dp = dp->next;
981 }
982 sysfs_free(list);
983 if (active < active_cnt) {
984 /* Spare just activated.*/
985 st->ss->free_super(st);
986 return 1;
987 }
988 }
989
326727d9 990 /* We only need one spare so full list not needed */
fbfdcb06 991 list = container_choose_spares(st, sc, domlist, from->spare_group,
326727d9
AC
992 to->metadata->ss->name, 1);
993 if (list) {
994 struct mdinfo *disks = list->devs;
995 if (disks)
996 dev = makedev(disks->disk.major, disks->disk.minor);
997 sysfs_free(list);
5739e0d0 998 }
326727d9 999 st->ss->free_super(st);
5739e0d0
N
1000 return dev;
1001}
1002
b3015166 1003static void try_spare_migration(struct state *statelist)
2e0172b1 1004{
66f5c4b6
N
1005 struct state *from;
1006 struct state *st;
fbfdcb06 1007 struct spare_criteria sc;
c3621c0a
ML
1008
1009 link_containers_with_subarrays(statelist);
66f5c4b6 1010 for (st = statelist; st; st = st->next)
f566ef45 1011 if (st->active < st->raid && st->spare == 0 && !st->err) {
e78dda3b
N
1012 struct domainlist *domlist = NULL;
1013 int d;
66f5c4b6
N
1014 struct state *to = st;
1015
4dd2df09 1016 if (to->parent_devnm[0] && !to->parent)
c0dc0ad5
CA
1017 /* subarray monitored without parent container
1018 * we can't move spares here */
1019 continue;
1011e834 1020
66f5c4b6
N
1021 if (to->parent)
1022 /* member of a container */
1023 to = to->parent;
e78dda3b 1024
fbfdcb06 1025 if (get_required_spare_criteria(to, &sc))
de697acc 1026 continue;
e9a2ac02
AC
1027 if (to->metadata->ss->external) {
1028 /* We must make sure there is
1029 * no suitable spare in container already.
1030 * If there is we don't add more */
1031 dev_t devid = container_choose_spare(
fbfdcb06 1032 to, to, NULL, &sc, st->active);
e9a2ac02
AC
1033 if (devid > 0)
1034 continue;
1035 }
b0599bda 1036 for (d = 0; d < MAX_DISKS; d++)
e78dda3b
N
1037 if (to->devid[d])
1038 domainlist_add_dev(&domlist,
1039 to->devid[d],
1040 to->metadata->ss->name);
1041 if (to->spare_group)
1042 domain_add(&domlist, to->spare_group);
5ec0f373
ML
1043 /*
1044 * No spare migration if the destination
1045 * has no domain. Skip this array.
1046 */
1047 if (!domlist)
1048 continue;
0fa21e85 1049 for (from=statelist ; from ; from=from->next) {
0f0749ad 1050 dev_t devid;
5ec0f373 1051 if (!check_donor(from, to))
0fa21e85 1052 continue;
5739e0d0
N
1053 if (from->metadata->ss->external)
1054 devid = container_choose_spare(
fbfdcb06 1055 from, to, domlist, &sc, 0);
5739e0d0 1056 else
f0b85306 1057 devid = choose_spare(from, to, domlist,
fbfdcb06 1058 &sc);
f566ef45
JS
1059 if (devid > 0 &&
1060 move_spare(from->devname, to->devname,
1061 devid)) {
50232a6e 1062 alert(EVENT_MOVE_SPARE, 0, to->devname, from->devname);
d52bb542
AC
1063 break;
1064 }
0fa21e85 1065 }
e78dda3b 1066 domain_free(domlist);
2e0172b1
N
1067 }
1068}
c3621c0a
ML
1069
1070/* search the statelist to connect external
1071 * metadata subarrays with their containers
1072 * We always completely rebuild the tree from scratch as
1073 * that is safest considering the possibility of entries
1074 * disappearing or changing.
1075 */
1076static void link_containers_with_subarrays(struct state *list)
1077{
1078 struct state *st;
1079 struct state *cont;
1080 for (st = list; st; st = st->next) {
1081 st->parent = NULL;
1082 st->subarray = NULL;
1083 }
1084 for (st = list; st; st = st->next)
4dd2df09 1085 if (st->parent_devnm[0])
c3621c0a 1086 for (cont = list; cont; cont = cont->next)
f566ef45 1087 if (!cont->err && cont->parent_devnm[0] == 0 &&
4dd2df09 1088 strcmp(cont->devnm, st->parent_devnm) == 0) {
c3621c0a
ML
1089 st->parent = cont;
1090 st->subarray = cont->subarray;
1091 cont->subarray = st;
1092 break;
1093 }
1094}
1095
55c10e4d
PB
1096/**
1097 * free_statelist() - Frees statelist.
1098 * @statelist: statelist to free
1099 */
1100static void free_statelist(struct state *statelist)
1101{
1102 struct state *tmp = NULL;
1103
1104 while (statelist) {
1105 if (statelist->spare_group)
1106 free(statelist->spare_group);
1107
1108 tmp = statelist;
1109 statelist = statelist->next;
1110 free(tmp);
1111 }
1112}
1113
af3396da 1114#ifndef NO_LIBUDEV
49b69533
OS
1115/* function: check_udev_activity
1116 * Description: Function waits for udev to finish
1117 * events processing.
1118 * Returns:
1119 * 1 - detected error while opening udev
1120 * 2 - timeout
1121 * 0 - successfull completion
1122 */
1123static int check_udev_activity(void)
1124{
1125 struct udev *udev = NULL;
1126 struct udev_queue *udev_queue = NULL;
1127 int timeout_cnt = 30;
1128 int rc = 0;
1129
1130 /*
1131 * In rare cases systemd may not have udevm,
1132 * in such cases just exit with rc 0
1133 */
1134 if (!use_udev())
1135 goto out;
1136
1137 udev = udev_new();
1138 if (!udev) {
1139 rc = 1;
1140 goto out;
1141 }
1142
1143 udev_queue = udev_queue_new(udev);
1144 if (!udev_queue) {
1145 rc = 1;
1146 goto out;
1147 }
1148
1149 if (udev_queue_get_queue_is_empty(udev_queue))
1150 goto out;
1151
1152 while (!udev_queue_get_queue_is_empty(udev_queue)) {
1153 sleep(1);
1154
1155 if (timeout_cnt)
1156 timeout_cnt--;
1157 else {
1158 rc = 2;
1159 goto out;
1160 }
1161 }
1162
1163out:
1164 if (udev_queue)
1165 udev_queue_unref(udev_queue);
1166 if (udev)
1167 udev_unref(udev);
1168 return rc;
1169}
af3396da 1170#endif
49b69533 1171
b90c0e9a
NB
1172/* Not really Monitor but ... */
1173int Wait(char *dev)
1174{
4dd2df09 1175 char devnm[32];
cb91230c
TM
1176 dev_t rdev;
1177 char *tmp;
b90c0e9a 1178 int rv = 1;
d3f6cf4f 1179 int frozen_remaining = 3;
b90c0e9a 1180
cb91230c 1181 if (!stat_is_blkdev(dev, &rdev))
b90c0e9a 1182 return 2;
cb91230c
TM
1183
1184 tmp = devid2devnm(rdev);
1185 if (!tmp) {
1186 pr_err("Cannot get md device name.\n");
1187 return 2;
1188 }
1189
1190 strcpy(devnm, tmp);
b90c0e9a
NB
1191
1192 while(1) {
1193 struct mdstat_ent *ms = mdstat_read(1, 0);
1194 struct mdstat_ent *e;
1195
f566ef45 1196 for (e = ms; e; e = e->next)
4dd2df09 1197 if (strcmp(e->devnm, devnm) == 0)
b90c0e9a
NB
1198 break;
1199
d3f6cf4f 1200 if (e && e->percent == RESYNC_NONE) {
276be514
N
1201 /* We could be in the brief pause before something
1202 * starts. /proc/mdstat doesn't show that, but
1203 * sync_action does.
1204 */
1205 struct mdinfo mdi;
1206 char buf[21];
dae13137
JS
1207
1208 if (sysfs_init(&mdi, -1, devnm))
1209 return 2;
276be514
N
1210 if (sysfs_get_str(&mdi, NULL, "sync_action",
1211 buf, 20) > 0 &&
d3f6cf4f 1212 strcmp(buf,"idle\n") != 0) {
276be514 1213 e->percent = RESYNC_UNKNOWN;
d3f6cf4f
N
1214 if (strcmp(buf, "frozen\n") == 0) {
1215 if (frozen_remaining == 0)
1216 e->percent = RESYNC_NONE;
1217 else
1218 frozen_remaining -= 1;
1219 }
1220 }
276be514 1221 }
639c3c10 1222 if (!e || e->percent == RESYNC_NONE) {
e7783ee6 1223 if (e && e->metadata_version &&
c94709e8
DW
1224 strncmp(e->metadata_version, "external:", 9) == 0) {
1225 if (is_subarray(&e->metadata_version[9]))
1226 ping_monitor(&e->metadata_version[9]);
1227 else
4dd2df09 1228 ping_monitor(devnm);
c94709e8 1229 }
b90c0e9a
NB
1230 free_mdstat(ms);
1231 return rv;
1232 }
89a10d84 1233 free_mdstat(ms);
b90c0e9a
NB
1234 rv = 0;
1235 mdstat_wait(5);
1236 }
1237}
679eb882 1238
43ebc910
GP
1239/* The state "broken" is used only for RAID0/LINEAR - it's the same as
1240 * "clean", but used in case the array has one or more members missing.
1241 */
679eb882 1242static char *clean_states[] = {
43ebc910 1243 "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
679eb882 1244
1ea04629 1245int WaitClean(char *dev, int verbose)
679eb882
N
1246{
1247 int fd;
1248 struct mdinfo *mdi;
1249 int rv = 1;
4dd2df09 1250 char devnm[32];
679eb882 1251
9e04ac1c
ZL
1252 if (!stat_is_blkdev(dev, NULL))
1253 return 2;
679eb882
N
1254 fd = open(dev, O_RDONLY);
1255 if (fd < 0) {
1256 if (verbose)
e7b84f9d 1257 pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
679eb882
N
1258 return 1;
1259 }
1260
4dd2df09
N
1261 strcpy(devnm, fd2devnm(fd));
1262 mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
679eb882
N
1263 if (!mdi) {
1264 if (verbose)
7a862a02 1265 pr_err("Failed to read sysfs attributes for %s\n", dev);
679eb882
N
1266 close(fd);
1267 return 0;
1268 }
1269
1270 switch(mdi->array.level) {
1271 case LEVEL_LINEAR:
1272 case LEVEL_MULTIPATH:
1273 case 0:
1274 /* safemode delay is irrelevant for these levels */
1275 rv = 0;
1276 }
1277
1278 /* for internal metadata the kernel handles the final clean
1279 * transition, containers can never be dirty
1280 */
1281 if (!is_subarray(mdi->text_version))
1282 rv = 0;
1283
1284 /* safemode disabled ? */
1285 if (mdi->safe_mode_delay == 0)
1286 rv = 0;
1287
1288 if (rv) {
4dd2df09 1289 int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
679eb882 1290 char buf[20];
efc67e8e 1291 int delay = 5000;
679eb882
N
1292
1293 /* minimize the safe_mode_delay and prepare to wait up to 5s
1294 * for writes to quiesce
1295 */
1296 sysfs_set_safemode(mdi, 1);
679eb882
N
1297
1298 /* wait for array_state to be clean */
1299 while (1) {
1300 rv = read(state_fd, buf, sizeof(buf));
1301 if (rv < 0)
1302 break;
43ebc910
GP
1303 if (sysfs_match_word(buf, clean_states) <
1304 (int)ARRAY_SIZE(clean_states) - 1)
679eb882 1305 break;
efc67e8e 1306 rv = sysfs_wait(state_fd, &delay);
679eb882
N
1307 if (rv < 0 && errno != EINTR)
1308 break;
1309 lseek(state_fd, 0, SEEK_SET);
1310 }
1311 if (rv < 0)
1312 rv = 1;
1ea04629 1313 else if (ping_monitor(mdi->text_version) == 0) {
679eb882
N
1314 /* we need to ping to close the window between array
1315 * state transitioning to clean and the metadata being
1316 * marked clean
1317 */
1318 rv = 0;
1ea04629 1319 } else {
679eb882 1320 rv = 1;
1ea04629
MT
1321 pr_err("Error connecting monitor with %s\n", dev);
1322 }
679eb882 1323 if (rv && verbose)
f566ef45 1324 pr_err("Error waiting for %s to be clean\n", dev);
679eb882
N
1325
1326 /* restore the original safe_mode_delay */
1327 sysfs_set_safemode(mdi, mdi->safe_mode_delay);
1328 close(state_fd);
1329 }
1330
1331 sysfs_free(mdi);
1332 close(fd);
1333
1334 return rv;
1335}