]> git.ipfire.org Git - thirdparty/mdadm.git/blame - Monitor.c
Create.c: fix uclibc build
[thirdparty/mdadm.git] / Monitor.c
CommitLineData
52826846 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
52826846 3 *
e736b623 4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
52826846
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
e736b623 22 * Email: <neilb@suse.de>
52826846
NB
23 */
24
9a9dab36 25#include "mdadm.h"
9935cf0f 26#include "udev.h"
52826846
NB
27#include "md_p.h"
28#include "md_u.h"
e0d19036 29#include <sys/wait.h>
4450e59f 30#include <limits.h>
773135f5 31#include <syslog.h>
52826846 32
0a07dea8 33#define TASK_COMM_LEN 16
50232a6e 34#define EVENT_NAME_MAX 32
b6a84d4e 35#define AUTOREBUILD_PID_PATH MDMON_DIR "/autorebuild.pid"
9935cf0f 36#define FALLBACK_DELAY 5
50232a6e 37
b9ce7ab0
MT
38/**
39 * struct state - external array or container properties.
40 * @devname: has length of %DEV_MD_DIR + device name + terminating byte
41 * @devnm: to sync with mdstat info
42 * @parent_devnm: or subarray, devnm of parent, for others, ""
43 * @subarray: for a container it is a link to first subarray, for a subarray it is a link to next
44 * subarray in the same container
45 * @parent: for a subarray it is a link to its container
46 */
2e0172b1 47struct state {
b9ce7ab0
MT
48 char devname[MD_NAME_MAX + sizeof(DEV_MD_DIR)];
49 char devnm[MD_NAME_MAX];
1d13b599 50 unsigned int utime;
2e0172b1
N
51 int err;
52 char *spare_group;
53 int active, working, failed, spare, raid;
f1661bd7 54 int from_config;
73ff0732 55 int from_auto;
2e0172b1 56 int expected_spares;
b0599bda
N
57 int devstate[MAX_DISKS];
58 dev_t devid[MAX_DISKS];
2e0172b1 59 int percent;
b9ce7ab0 60 char parent_devnm[MD_NAME_MAX];
2e0172b1 61 struct supertype *metadata;
b9ce7ab0
MT
62 struct state *subarray;
63 struct state *parent;
2e0172b1
N
64 struct state *next;
65};
66
e0bd6a96 67struct alert_info {
b3015166 68 char hostname[HOST_NAME_MAX];
e0bd6a96
N
69 char *mailaddr;
70 char *mailfrom;
71 char *alert_cmd;
72 int dosyslog;
b3015166
MG
73 int test;
74} info;
50232a6e
MG
75
76enum event {
77 EVENT_SPARE_ACTIVE = 0,
78 EVENT_NEW_ARRAY,
79 EVENT_MOVE_SPARE,
80 EVENT_TEST_MESSAGE,
cc3df167 81 __SYSLOG_PRIORITY_WARNING,
50232a6e
MG
82 EVENT_REBUILD_STARTED,
83 EVENT_REBUILD,
84 EVENT_REBUILD_FINISHED,
85 EVENT_SPARES_MISSING,
cc3df167 86 __SYSLOG_PRIORITY_CRITICAL,
50232a6e
MG
87 EVENT_DEVICE_DISAPPEARED,
88 EVENT_FAIL,
89 EVENT_FAIL_SPARE,
90 EVENT_DEGRADED_ARRAY,
91 EVENT_UNKNOWN
92};
93
94mapping_t events_map[] = {
95 {"SpareActive", EVENT_SPARE_ACTIVE},
96 {"NewArray", EVENT_NEW_ARRAY},
97 {"MoveSpare", EVENT_MOVE_SPARE},
98 {"TestMessage", EVENT_TEST_MESSAGE},
99 {"RebuildStarted", EVENT_REBUILD_STARTED},
100 {"Rebuild", EVENT_REBUILD},
101 {"RebuildFinished", EVENT_REBUILD_FINISHED},
102 {"SparesMissing", EVENT_SPARES_MISSING},
103 {"DeviceDisappeared", EVENT_DEVICE_DISAPPEARED},
104 {"Fail", EVENT_FAIL},
105 {"FailSpare", EVENT_FAIL_SPARE},
106 {"DegradedArray", EVENT_DEGRADED_ARRAY},
107 {NULL, EVENT_UNKNOWN}
108};
109
cc3df167
MG
110struct event_data {
111 enum event event_enum;
112 /*
113 * @event_name: Rebuild event name must be in form "RebuildXX", where XX is rebuild progress.
114 */
115 char event_name[EVENT_NAME_MAX];
116 char message[BUFSIZ];
117 const char *description;
118 const char *dev;
119 const char *disc;
120};
121
b3015166
MG
122static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist);
123static void try_spare_migration(struct state *statelist);
c3621c0a 124static void link_containers_with_subarrays(struct state *list);
55c10e4d 125static void free_statelist(struct state *statelist);
cc3df167
MG
126static int check_array(struct state *st, struct mdstat_ent *mdstat, int increments, char *prefer);
127static int check_one_sharer(int scan);
cc3df167
MG
128static void link_containers_with_subarrays(struct state *list);
129static int make_daemon(char *pidfile);
130static void try_spare_migration(struct state *statelist);
9935cf0f
MG
131static void wait_for_events(int *delay_for_event, int c_delay);
132static void wait_for_events_mdstat(int *delay_for_event, int c_delay);
b6a84d4e 133static int write_autorebuild_pid(void);
2e0172b1 134
a655e550 135int Monitor(struct mddev_dev *devlist,
52826846 136 char *mailaddr, char *alert_cmd,
95c50205
N
137 struct context *c,
138 int daemonise, int oneshot,
139 int dosyslog, char *pidfile, int increments,
140 int share)
52826846
NB
141{
142 /*
143 * Every few seconds, scan every md device looking for changes
144 * When a change is found, log it, possibly run the alert command,
145 * and possibly send Email
146 *
147 * For each array, we record:
148 * Update time
149 * active/working/failed/spare drives
150 * State of each device.
e0d19036 151 * %rebuilt if rebuilding
52826846
NB
152 *
153 * If the update time changes, check out all the data again
154 * It is possible that we cannot get the state of each device
155 * due to bugs in the md kernel module.
e0d19036
NB
156 * We also read /proc/mdstat to get rebuild percent,
157 * and to get state on all active devices incase of kernel bug.
52826846 158 *
e0d19036
NB
159 * Events are:
160 * Fail
161 * An active device had Faulty set or Active/Sync removed
162 * FailSpare
163 * A spare device had Faulty set
164 * SpareActive
165 * An active device had a reverse transition
166 * RebuildStarted
167 * percent went from -1 to +ve
9a36a9b7
ZB
168 * RebuildNN
169 * percent went from below to not-below NN%
e0d19036
NB
170 * DeviceDisappeared
171 * Couldn't access a device which was previously visible
52826846
NB
172 *
173 * if we detect an array with active<raid and spare==0
174 * we look at other arrays that have same spare-group
175 * If we find one with active==raid and spare>0,
176 * and if we can get_disk_info and find a name
177 * Then we hot-remove and hot-add to the other array
178 *
f40ac0e7 179 * If devlist is NULL, then we can monitor everything if --scan
e0d19036
NB
180 * was given. We get an initial list from config file and add anything
181 * that appears in /proc/mdstat
52826846
NB
182 */
183
2e0172b1 184 struct state *statelist = NULL;
52826846 185 int finished = 0;
e0d19036 186 struct mdstat_ent *mdstat = NULL;
72362f18 187 char *mailfrom;
9f3dd454 188 struct mddev_ident *mdlist;
af3396da 189 int delay_for_event = c->delay;
e0d19036 190
f40ac0e7
BK
191 if (devlist && c->scan) {
192 pr_err("Devices list and --scan option cannot be combined - not monitoring.\n");
193 return 1;
194 }
195
f5ff2988 196 if (!mailaddr)
8aec876d 197 mailaddr = conf_get_mailaddr();
4948b8f7 198
f5ff2988 199 if (!alert_cmd)
8aec876d 200 alert_cmd = conf_get_program();
f5ff2988
KT
201
202 mailfrom = conf_get_mailfrom();
203
95c50205 204 if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
e7b84f9d 205 pr_err("No mail address or alert command - not monitoring.\n");
d013a55e 206 return 1;
56eedc1a 207 }
f5ff2988
KT
208
209 if (c->verbose) {
210 pr_err("Monitor is started with delay %ds\n", c->delay);
211 if (mailaddr)
212 pr_err("Monitor using email address %s\n", mailaddr);
213 if (alert_cmd)
214 pr_err("Monitor using program %s\n", alert_cmd);
215 }
216
e0bd6a96
N
217 info.alert_cmd = alert_cmd;
218 info.mailaddr = mailaddr;
219 info.mailfrom = mailfrom;
220 info.dosyslog = dosyslog;
b3015166
MG
221 info.test = c->test;
222
21e622f2 223 if (s_gethostname(info.hostname, sizeof(info.hostname)) != 0) {
b3015166
MG
224 pr_err("Cannot get hostname.\n");
225 return 1;
226 }
d013a55e 227
7f3b2d1d 228 if (share){
0a07dea8 229 if (check_one_sharer(c->scan) == 2)
7f3b2d1d
BK
230 return 1;
231 }
232
44d337f0
AC
233 if (daemonise) {
234 int rv = make_daemon(pidfile);
235 if (rv >= 0)
236 return rv;
237 }
e0d19036 238
1011e834 239 if (share)
b6a84d4e
MG
240 if (write_autorebuild_pid() != 0)
241 return 1;
edde9560 242
e0d19036 243 if (devlist == NULL) {
9f3dd454 244 mdlist = conf_get_ident(NULL);
f566ef45 245 for (; mdlist; mdlist = mdlist->next) {
fe056d1f 246 struct state *st;
f566ef45 247
fe056d1f
N
248 if (mdlist->devname == NULL)
249 continue;
7b3b691b 250 if (is_devname_ignore(mdlist->devname) == true)
112cace6 251 continue;
8b668d4a
LF
252 if (!is_mddev(mdlist->devname))
253 continue;
e702f392 254
503975b9 255 st = xcalloc(1, sizeof *st);
b9ce7ab0
MT
256 snprintf(st->devname, MD_NAME_MAX + sizeof(DEV_MD_DIR), DEV_MD_DIR "%s",
257 basename(mdlist->devname));
e0d19036 258 st->next = statelist;
4dd2df09 259 st->devnm[0] = 0;
9dad51d4 260 st->percent = RESYNC_UNKNOWN;
f1661bd7 261 st->from_config = 1;
feb716e9 262 st->expected_spares = mdlist->spare_disks;
e0d19036 263 if (mdlist->spare_group)
503975b9 264 st->spare_group = xstrdup(mdlist->spare_group);
e0d19036
NB
265 statelist = st;
266 }
267 } else {
a655e550 268 struct mddev_dev *dv;
f566ef45
JS
269
270 for (dv = devlist; dv; dv = dv->next) {
e702f392
KT
271 struct state *st;
272
8b668d4a
LF
273 if (!is_mddev(dv->devname))
274 continue;
e702f392
KT
275
276 st = xcalloc(1, sizeof *st);
9f3dd454 277 mdlist = conf_get_ident(dv->devname);
b9ce7ab0 278 snprintf(st->devname, MD_NAME_MAX + sizeof(DEV_MD_DIR), "%s", dv->devname);
e0d19036 279 st->next = statelist;
4dd2df09 280 st->devnm[0] = 0;
9dad51d4 281 st->percent = RESYNC_UNKNOWN;
feb716e9 282 st->expected_spares = -1;
e5329c37
NB
283 if (mdlist) {
284 st->expected_spares = mdlist->spare_disks;
285 if (mdlist->spare_group)
503975b9 286 st->spare_group = xstrdup(mdlist->spare_group);
e5329c37 287 }
e0d19036
NB
288 statelist = st;
289 }
290 }
291
f566ef45 292 while (!finished) {
aa88f531 293 int new_found = 0;
73ff0732 294 struct state *st, **stp;
a90e1050 295 int anydegraded = 0;
007087d0 296 int anyredundant = 0;
e0d19036
NB
297
298 if (mdstat)
299 free_mdstat(mdstat);
f566ef45 300 mdstat = mdstat_read(oneshot ? 0 : 1, 0);
e0d19036 301
007087d0 302 for (st = statelist; st; st = st->next) {
b3015166 303 if (check_array(st, mdstat, increments, c->prefer))
a90e1050 304 anydegraded = 1;
007087d0
MT
305 /* for external arrays, metadata is filled for
306 * containers only
307 */
308 if (st->metadata && st->metadata->ss->external)
309 continue;
310 if (st->err == 0 && !anyredundant)
311 anyredundant = 1;
312 }
1011e834 313
e0d19036 314 /* now check if there are any new devices found in mdstat */
95c50205 315 if (c->scan)
b3015166 316 new_found = add_new_arrays(mdstat, &statelist);
2e0172b1 317
e0d19036
NB
318 /* If an array has active < raid && spare == 0 && spare_group != NULL
319 * Look for another array with spare > 0 and active == raid and same spare_group
49b69533 320 * if found, choose a device and hotremove/hotadd
e0d19036 321 */
a90e1050 322 if (share && anydegraded)
b3015166 323 try_spare_migration(statelist);
aa88f531
NB
324 if (!new_found) {
325 if (oneshot)
326 break;
9935cf0f 327 if (!anyredundant) {
8c80d305 328 pr_err("No array with redundancy detected, stopping\n");
007087d0
MT
329 break;
330 }
9935cf0f
MG
331
332 wait_for_events(&delay_for_event, c->delay);
aa88f531 333 }
b3015166 334 info.test = 0;
73ff0732
N
335
336 for (stp = &statelist; (st = *stp) != NULL; ) {
337 if (st->from_auto && st->err > 5) {
338 *stp = st->next;
55c10e4d
PB
339 if (st->spare_group)
340 free(st->spare_group);
341
73ff0732
N
342 free(st);
343 } else
344 stp = &st->next;
345 }
52826846 346 }
55c10e4d
PB
347
348 free_statelist(statelist);
b657208c 349
b5e64645
NB
350 if (pidfile)
351 unlink(pidfile);
52826846
NB
352 return 0;
353}
354
9935cf0f
MG
355/*
356 * wait_for_events() - Waits for events on md devices.
357 * @delay_for_event: pointer to current event delay
358 * @c_delay: delay from config
359 */
360static void wait_for_events(int *delay_for_event, int c_delay)
361{
362#ifndef NO_LIBUDEV
363 if (udev_is_available()) {
364 if (udev_wait_for_events(*delay_for_event) == UDEV_STATUS_ERROR)
365 pr_err("Error while waiting for udev events.\n");
366 return;
367 }
368#endif
369 wait_for_events_mdstat(delay_for_event, c_delay);
370}
371
372/*
373 * wait_for_events_mdstat() - Waits for events on mdstat.
374 * @delay_for_event: pointer to current event delay
375 * @c_delay: delay from config
376 */
377static void wait_for_events_mdstat(int *delay_for_event, int c_delay)
378{
379 int wait_result = mdstat_wait(*delay_for_event);
380
381 if (wait_result < 0) {
382 pr_err("Error while waiting for events on mdstat.\n");
383 return;
384 }
385
386 /*
387 * Give chance to process new device
388 */
389 if (wait_result != 0) {
390 if (c_delay > FALLBACK_DELAY)
391 *delay_for_event = FALLBACK_DELAY;
392 } else {
393 *delay_for_event = c_delay;
394 }
395 mdstat_close();
396}
397
2e0172b1
N
398static int make_daemon(char *pidfile)
399{
44d337f0
AC
400 /* Return:
401 * -1 in the forked daemon
402 * 0 in the parent
403 * 1 on error
404 * so a none-negative becomes the exit code.
405 */
2e0172b1
N
406 int pid = fork();
407 if (pid > 0) {
408 if (!pidfile)
409 printf("%d\n", pid);
410 else {
ca4b156b
MT
411 FILE *pid_file = NULL;
412 int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC,
413 0644);
414 if (fd >= 0)
415 pid_file = fdopen(fd, "w");
2e0172b1
N
416 if (!pid_file)
417 perror("cannot create pid file");
418 else {
419 fprintf(pid_file,"%d\n", pid);
420 fclose(pid_file);
421 }
422 }
423 return 0;
424 }
425 if (pid < 0) {
426 perror("daemonise");
427 return 1;
428 }
ff6bb131 429 manage_fork_fds(0);
2e0172b1 430 setsid();
44d337f0 431 return -1;
2e0172b1
N
432}
433
0a07dea8
MG
434/*
435 * check_one_sharer() - Checks for other mdmon processes running.
436 *
437 * Return:
438 * 0 - no other processes running,
439 * 1 - warning,
440 * 2 - error, or when scan mode is enabled, and one mdmon process already exists
441 */
2e0172b1
N
442static int check_one_sharer(int scan)
443{
185ec439 444 int pid;
0a07dea8 445 FILE *fp, *comm_fp;
7f3b2d1d 446 char comm_path[PATH_MAX];
0a07dea8
MG
447 char comm[TASK_COMM_LEN];
448
449 if (!is_directory(MDMON_DIR)) {
450 pr_err("%s is not a regular directory.\n", MDMON_DIR);
451 return 2;
452 }
453
0a07dea8
MG
454 fp = fopen(AUTOREBUILD_PID_PATH, "r");
455 if (!fp) {
3c3ddeec
MK
456 /* PID file does not exist */
457 if (errno == ENOENT)
458 return 0;
459
0a07dea8
MG
460 pr_err("Cannot open %s file.\n", AUTOREBUILD_PID_PATH);
461 return 2;
462 }
463
b7d78371
MK
464 if (!is_file(AUTOREBUILD_PID_PATH)) {
465 pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH);
3c3ddeec 466 fclose(fp);
b7d78371
MK
467 return 2;
468 }
469
0a07dea8
MG
470 if (fscanf(fp, "%d", &pid) != 1) {
471 pr_err("Cannot read pid from %s file.\n", AUTOREBUILD_PID_PATH);
472 fclose(fp);
473 return 2;
474 }
475
476 snprintf(comm_path, sizeof(comm_path), "/proc/%d/comm", pid);
477
478 comm_fp = fopen(comm_path, "r");
479 if (!comm_fp) {
480 dprintf("Warning: Cannot open %s, continuing\n", comm_path);
481 fclose(fp);
482 return 1;
483 }
484
485 if (fscanf(comm_fp, "%15s", comm) == 0) {
486 dprintf("Warning: Cannot read comm from %s, continuing\n", comm_path);
487 fclose(comm_fp);
488 fclose(fp);
489 return 1;
490 }
491
492 if (strncmp(basename(comm), Name, strlen(Name)) == 0) {
493 if (scan) {
494 pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
185ec439 495 fclose(comm_fp);
0a07dea8
MG
496 fclose(fp);
497 return 2;
2e0172b1 498 }
0a07dea8 499 pr_err("Warning: One autorebuild process already running.\n");
2e0172b1 500 }
0a07dea8
MG
501 fclose(comm_fp);
502 fclose(fp);
7f3b2d1d
BK
503 return 0;
504}
505
b6a84d4e
MG
506/*
507 * write_autorebuild_pid() - Writes pid to autorebuild.pid file.
508 *
509 * Return: 0 on success, 1 on error
510 */
511static int write_autorebuild_pid(void)
7f3b2d1d 512{
b6a84d4e
MG
513 FILE *fp;
514 int fd;
7f3b2d1d 515
ca4b156b 516 if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) {
b6a84d4e
MG
517 pr_err("%s: %s\n", strerror(errno), MDMON_DIR);
518 return 1;
519 }
520
521 if (!is_directory(MDMON_DIR)) {
522 pr_err("%s is not a regular directory.\n", MDMON_DIR);
523 return 1;
524 }
ca4b156b 525
b6a84d4e 526 fd = open(AUTOREBUILD_PID_PATH, O_WRONLY | O_CREAT | O_TRUNC, 0700);
ca4b156b 527
b6a84d4e
MG
528 if (fd < 0) {
529 pr_err("Error opening %s file.\n", AUTOREBUILD_PID_PATH);
530 return 1;
531 }
532
533 fp = fdopen(fd, "w");
534
535 if (!fp) {
536 pr_err("Error opening fd for %s file.\n", AUTOREBUILD_PID_PATH);
537 return 1;
2e0172b1 538 }
b6a84d4e
MG
539
540 fprintf(fp, "%d\n", getpid());
541
542 fclose(fp);
543 return 0;
2e0172b1 544}
52826846 545
cc3df167
MG
546#define BASE_MESSAGE "%s event detected on md device %s"
547#define COMPONENT_DEVICE_MESSAGE ", component device %s"
548#define DESCRIPTION_MESSAGE ": %s"
549/*
550 * sprint_event_message() - Writes basic message about detected event to destination ptr.
551 * @dest: message destination, should be at least the size of BUFSIZ
552 * @data: event data
553 *
554 * Return: 0 on success, 1 on error
555 */
556static int sprint_event_message(char *dest, const struct event_data *data)
557{
558 if (!dest || !data)
559 return 1;
560
561 if (data->disc && data->description)
562 snprintf(dest, BUFSIZ, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE DESCRIPTION_MESSAGE,
563 data->event_name, data->dev, data->disc, data->description);
564 else if (data->disc)
565 snprintf(dest, BUFSIZ, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE,
566 data->event_name, data->dev, data->disc);
567 else if (data->description)
568 snprintf(dest, BUFSIZ, BASE_MESSAGE DESCRIPTION_MESSAGE,
569 data->event_name, data->dev, data->description);
570 else
571 snprintf(dest, BUFSIZ, BASE_MESSAGE, data->event_name, data->dev);
572
573 return 0;
574}
575
576/*
577 * get_syslog_event_priority() - Determines event priority.
578 * @event_enum: event to be checked
579 *
580 * Return: LOG_CRIT, LOG_WARNING or LOG_INFO
581 */
582static int get_syslog_event_priority(const enum event event_enum)
583{
584 if (event_enum > __SYSLOG_PRIORITY_CRITICAL)
585 return LOG_CRIT;
586 if (event_enum > __SYSLOG_PRIORITY_WARNING)
587 return LOG_WARNING;
588 return LOG_INFO;
589}
590
591/*
592 * is_email_event() - Determines whether email for event should be sent or not.
593 * @event_enum: event to be checked
594 *
595 * Return: true if email should be sent, false otherwise
596 */
597static bool is_email_event(const enum event event_enum)
598{
599 static const enum event email_events[] = {
600 EVENT_FAIL,
601 EVENT_FAIL_SPARE,
602 EVENT_DEGRADED_ARRAY,
603 EVENT_SPARES_MISSING,
604 EVENT_TEST_MESSAGE
605 };
606 unsigned int i;
607
608 for (i = 0; i < ARRAY_SIZE(email_events); ++i) {
609 if (event_enum == email_events[i])
610 return true;
611 }
612 return false;
613}
614
615/*
616 * execute_alert_cmd() - Forks and executes command provided as alert_cmd.
617 * @data: event data
618 */
619static void execute_alert_cmd(const struct event_data *data)
36988671
MG
620{
621 int pid = fork();
622
623 switch (pid) {
624 default:
625 waitpid(pid, NULL, 0);
626 break;
627 case -1:
628 pr_err("Cannot fork to execute alert command");
629 break;
630 case 0:
cc3df167 631 execl(info.alert_cmd, info.alert_cmd, data->event_name, data->dev, data->disc, NULL);
36988671
MG
632 exit(2);
633 }
634}
635
cc3df167
MG
636/*
637 * send_event_email() - Sends an email about event detected by monitor.
638 * @data: event data
639 */
640static void send_event_email(const struct event_data *data)
36988671
MG
641{
642 FILE *mp, *mdstat;
36988671
MG
643 char buf[BUFSIZ];
644 int n;
645
646 mp = popen(Sendmail, "w");
647 if (!mp) {
648 pr_err("Cannot open pipe stream for sendmail.\n");
649 return;
650 }
651
36988671 652 signal(SIGPIPE, SIG_IGN);
b3015166
MG
653 if (info.mailfrom)
654 fprintf(mp, "From: %s\n", info.mailfrom);
36988671
MG
655 else
656 fprintf(mp, "From: %s monitoring <root>\n", Name);
b3015166 657 fprintf(mp, "To: %s\n", info.mailaddr);
cc3df167
MG
658 fprintf(mp, "Subject: %s event on %s:%s\n\n", data->event_name, data->dev, info.hostname);
659 fprintf(mp, "This is an automatically generated mail message.\n");
660 fprintf(mp, "%s\n", data->message);
36988671
MG
661
662 mdstat = fopen("/proc/mdstat", "r");
663 if (!mdstat) {
664 pr_err("Cannot open /proc/mdstat\n");
665 pclose(mp);
666 return;
667 }
668
669 fprintf(mp, "The /proc/mdstat file currently contains the following:\n\n");
670 while ((n = fread(buf, 1, sizeof(buf), mdstat)) > 0)
671 n = fwrite(buf, 1, n, mp);
672 fclose(mdstat);
673 pclose(mp);
674}
675
cc3df167
MG
676/*
677 * log_event_to_syslog() - Logs an event into syslog.
678 * @data: event data
679 */
680static void log_event_to_syslog(const struct event_data *data)
52826846 681{
773135f5 682 int priority;
cc3df167
MG
683
684 priority = get_syslog_event_priority(data->event_enum);
685
686 syslog(priority, "%s\n", data->message);
36988671 687}
773135f5 688
cc3df167
MG
689/*
690 * alert() - Alerts about the monitor event.
691 * @event_enum: event to be sent
692 * @description: event description
693 * @progress: rebuild progress
694 * @dev: md device name
695 * @disc: component device
696 *
697 * If needed function executes alert command, sends an email or logs event to syslog.
698 */
699static void alert(const enum event event_enum, const char *description, const uint8_t progress,
700 const char *dev, const char *disc)
36988671 701{
cc3df167
MG
702 struct event_data data = {.dev = dev, .disc = disc, .description = description};
703
704 if (!dev)
705 return;
aba69144 706
50232a6e 707 if (event_enum == EVENT_REBUILD) {
cc3df167 708 snprintf(data.event_name, sizeof(data.event_name), "%s%02d",
50232a6e
MG
709 map_num_s(events_map, EVENT_REBUILD), progress);
710 } else {
cc3df167 711 snprintf(data.event_name, sizeof(data.event_name), "%s", map_num_s(events_map, event_enum));
cd29a5c8 712 }
50232a6e 713
cc3df167 714 data.event_enum = event_enum;
36988671 715
cc3df167
MG
716 if (sprint_event_message(data.message, &data) != 0) {
717 pr_err("Cannot create event message.\n");
718 return;
52826846 719 }
cc3df167
MG
720 pr_err("%s\n", data.message);
721
722 if (info.alert_cmd)
723 execute_alert_cmd(&data);
724
725 if (info.mailaddr && is_email_event(event_enum))
726 send_event_email(&data);
773135f5 727
b3015166 728 if (info.dosyslog)
cc3df167 729 log_event_to_syslog(&data);
52826846 730}
b90c0e9a 731
a90e1050 732static int check_array(struct state *st, struct mdstat_ent *mdstat,
c2ecf5f6 733 int increments, char *prefer)
2e0172b1 734{
ff044d6b
AC
735 /* Update the state 'st' to reflect any changes shown in mdstat,
736 * or found by directly examining the array, and return
737 * '1' if the array is degraded, or '0' if it is optimal (or dead).
738 */
b3015166 739 struct { int state, major, minor; } disks_info[MAX_DISKS];
aed5f5c3 740 struct mdinfo *sra = NULL;
2e0172b1
N
741 mdu_array_info_t array;
742 struct mdstat_ent *mse = NULL, *mse2;
743 char *dev = st->devname;
b9a0309c 744 int fd;
2e0172b1 745 int i;
9e6d9291
N
746 int remaining_disks;
747 int last_disk;
721b662b 748 int new_array = 0;
1830e74b 749 int retval;
802961a2 750 int is_container = 0;
b3ab4e4d 751 unsigned long redundancy_only_flags = 0;
2e0172b1 752
b3015166 753 if (info.test)
cc3df167 754 alert(EVENT_TEST_MESSAGE, NULL, 0, dev, NULL);
b9a0309c 755
1830e74b
JS
756 retval = 0;
757
2e0172b1 758 fd = open(dev, O_RDONLY);
13e5d845
JS
759 if (fd < 0)
760 goto disappeared;
b9a0309c 761
802961a2 762 if (st->devnm[0] == 0)
84d969be 763 snprintf(st->devnm, MD_NAME_MAX, "%s", fd2devnm(fd));
802961a2
MT
764
765 for (mse2 = mdstat; mse2; mse2 = mse2->next)
766 if (strcmp(mse2->devnm, st->devnm) == 0) {
767 mse2->devnm[0] = 0; /* flag it as "used" */
768 mse = mse2;
769 }
770
771 if (!mse) {
772 /* duplicated array in statelist
773 * or re-created after reading mdstat
774 */
775 st->err++;
776 goto out;
777 }
778
779 if (mse->level == NULL)
780 is_container = 1;
781
2dab69c9 782 if (!is_container && !md_array_active(fd))
13e5d845 783 goto disappeared;
b9a0309c 784
2e0172b1 785 fcntl(fd, F_SETFD, FD_CLOEXEC);
13e5d845
JS
786 if (md_get_array_info(fd, &array) < 0)
787 goto disappeared;
788
b3ab4e4d
MT
789 if (!is_container && map_name(pers, mse->level) > 0)
790 redundancy_only_flags |= GET_MISMATCH;
802961a2
MT
791
792 sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS |
b3ab4e4d 793 GET_STATE | redundancy_only_flags);
aed5f5c3 794
aed5f5c3
JS
795 if (!sra)
796 goto disappeared;
797
2e0172b1
N
798 /* It's much easier to list what array levels can't
799 * have a device disappear than all of them that can
800 */
48bc2ade 801 if (sra->array.level == 0 || sra->array.level == -1) {
f1661bd7 802 if (!st->err && !st->from_config)
cc3df167 803 alert(EVENT_DEVICE_DISAPPEARED, "Wrong-Level", 0, dev, NULL);
73ff0732 804 st->err++;
1830e74b 805 goto out;
2e0172b1 806 }
2e0172b1 807
2e0172b1
N
808 /* this array is in /proc/mdstat */
809 if (array.utime == 0)
810 /* external arrays don't update utime, so
811 * just make sure it is always different. */
812 array.utime = st->utime + 1;;
813
0f760384 814 if (st->err) {
73ff0732 815 /* New array appeared where previously had an error */
0f760384
N
816 st->err = 0;
817 st->percent = RESYNC_NONE;
721b662b 818 new_array = 1;
007087d0 819 if (!is_container)
cc3df167 820 alert(EVENT_NEW_ARRAY, NULL, 0, st->devname, NULL);
0f760384
N
821 }
822
b8e5713c 823 if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
e5eb6857 824 st->working == sra->array.working_disks &&
b98943a4 825 st->spare == sra->array.spare_disks &&
d7be7d87 826 (mse == NULL || (mse->percent == st->percent))) {
ff044d6b 827 if ((st->active < st->raid) && st->spare == 0)
1830e74b
JS
828 retval = 1;
829 goto out;
2e0172b1
N
830 }
831 if (st->utime == 0 && /* new array */
f27904a5 832 mse->pattern && strchr(mse->pattern, '_') /* degraded */)
cc3df167 833 alert(EVENT_DEGRADED_ARRAY, NULL, 0, dev, NULL);
2e0172b1 834
b98943a4
JS
835 if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
836 sra->array.spare_disks < st->expected_spares)
cc3df167 837 alert(EVENT_SPARES_MISSING, NULL, 0, dev, NULL);
9dad51d4 838 if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
2e0172b1 839 mse->percent >= 0)
cc3df167 840 alert(EVENT_REBUILD_STARTED, NULL, 0, dev, NULL);
f27904a5 841 if (st->percent >= 0 && mse->percent >= 0 &&
2e0172b1 842 (mse->percent / increments) > (st->percent / increments)) {
2e0172b1 843 if((mse->percent / increments) == 0)
cc3df167 844 alert(EVENT_REBUILD_STARTED, NULL, 0, dev, NULL);
2e0172b1 845 else
cc3df167 846 alert(EVENT_REBUILD, NULL, mse->percent, dev, NULL);
2e0172b1
N
847 }
848
f27904a5 849 if (mse->percent == RESYNC_NONE && st->percent >= 0) {
2e0172b1
N
850 /* Rebuild/sync/whatever just finished.
851 * If there is a number in /mismatch_cnt,
852 * we should report that.
853 */
2e0172b1 854 if (sra && sra->mismatch_cnt > 0) {
8453f8d0
LD
855 char cnt[80];
856 snprintf(cnt, sizeof(cnt),
857 " mismatches found: %d (on raid level %d)",
f566ef45 858 sra->mismatch_cnt, sra->array.level);
cc3df167 859 alert(EVENT_REBUILD_FINISHED, NULL, 0, dev, cnt);
2e0172b1 860 } else
cc3df167 861 alert(EVENT_REBUILD_FINISHED, NULL, 0, dev, NULL);
2e0172b1
N
862 }
863 st->percent = mse->percent;
864
b98943a4 865 remaining_disks = sra->array.nr_disks;
f27904a5 866 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
2e0172b1
N
867 mdu_disk_info_t disc;
868 disc.number = i;
d97572f5 869 if (md_get_disk_info(fd, &disc) >= 0) {
b3015166
MG
870 disks_info[i].state = disc.state;
871 disks_info[i].major = disc.major;
872 disks_info[i].minor = disc.minor;
9e6d9291
N
873 if (disc.major || disc.minor)
874 remaining_disks --;
2e0172b1 875 } else
b3015166 876 disks_info[i].major = disks_info[i].minor = 0;
2e0172b1 877 }
9e6d9291 878 last_disk = i;
2e0172b1 879
4019ad07
JL
880 if (mse->metadata_version &&
881 strncmp(mse->metadata_version, "external:", 9) == 0 &&
4dd2df09
N
882 is_subarray(mse->metadata_version+9)) {
883 char *sl;
84d969be 884 snprintf(st->parent_devnm, MD_NAME_MAX, "%s", mse->metadata_version + 10);
4dd2df09
N
885 sl = strchr(st->parent_devnm, '/');
886 if (sl)
887 *sl = 0;
888 } else
889 st->parent_devnm[0] = 0;
f27904a5 890 if (st->metadata == NULL && st->parent_devnm[0] == 0)
2e0172b1
N
891 st->metadata = super_by_fd(fd, NULL);
892
f566ef45
JS
893 for (i = 0; i < MAX_DISKS; i++) {
894 mdu_disk_info_t disc = {0, 0, 0, 0, 0};
895 int newstate = 0;
2e0172b1
N
896 int change;
897 char *dv = NULL;
898 disc.number = i;
b3015166
MG
899 if (i < last_disk && (disks_info[i].major || disks_info[i].minor)) {
900 newstate = disks_info[i].state;
901 dv = map_dev_preferred(disks_info[i].major, disks_info[i].minor, 1,
f27904a5 902 prefer);
2e0172b1 903 disc.state = newstate;
b3015166
MG
904 disc.major = disks_info[i].major;
905 disc.minor = disks_info[i].minor;
721b662b
N
906 } else
907 newstate = (1 << MD_DISK_REMOVED);
908
2e0172b1 909 if (dv == NULL && st->devid[i])
f27904a5
JS
910 dv = map_dev_preferred(major(st->devid[i]),
911 minor(st->devid[i]), 1, prefer);
2e0172b1 912 change = newstate ^ st->devstate[i];
721b662b 913 if (st->utime && change && !st->err && !new_array) {
f27904a5 914 if ((st->devstate[i]&change) & (1 << MD_DISK_SYNC))
cc3df167 915 alert(EVENT_FAIL, NULL, 0, dev, dv);
f27904a5 916 else if ((newstate & (1 << MD_DISK_FAULTY)) &&
2e0172b1 917 (disc.major || disc.minor) &&
f27904a5
JS
918 st->devid[i] == makedev(disc.major,
919 disc.minor))
cc3df167 920 alert(EVENT_FAIL_SPARE, NULL, 0, dev, dv);
f27904a5 921 else if ((newstate&change) & (1 << MD_DISK_SYNC))
cc3df167 922 alert(EVENT_SPARE_ACTIVE, NULL, 0, dev, dv);
2e0172b1
N
923 }
924 st->devstate[i] = newstate;
925 st->devid[i] = makedev(disc.major, disc.minor);
926 }
b98943a4 927 st->active = sra->array.active_disks;
e5eb6857 928 st->working = sra->array.working_disks;
b98943a4 929 st->spare = sra->array.spare_disks;
b8e5713c 930 st->failed = sra->array.failed_disks;
2e0172b1 931 st->utime = array.utime;
12a9d21f 932 st->raid = sra->array.raid_disks;
2e0172b1 933 st->err = 0;
a90e1050 934 if ((st->active < st->raid) && st->spare == 0)
1830e74b
JS
935 retval = 1;
936
937 out:
aed5f5c3
JS
938 if (sra)
939 sysfs_free(sra);
802961a2 940 if (fd >= 0)
13e5d845 941 close(fd);
1830e74b 942 return retval;
13e5d845
JS
943
944 disappeared:
007087d0 945 if (!st->err && !is_container)
cc3df167 946 alert(EVENT_DEVICE_DISAPPEARED, NULL, 0, dev, NULL);
13e5d845
JS
947 st->err++;
948 goto out;
2e0172b1
N
949}
950
b3015166 951static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist)
2e0172b1
N
952{
953 struct mdstat_ent *mse;
954 int new_found = 0;
1e08717f 955 char *name;
2e0172b1 956
f566ef45 957 for (mse = mdstat; mse; mse = mse->next)
d7be7d87 958 if (mse->devnm[0] && (!mse->level || /* retrieve containers */
f566ef45
JS
959 (strcmp(mse->level, "raid0") != 0 &&
960 strcmp(mse->level, "linear") != 0))) {
503975b9 961 struct state *st = xcalloc(1, sizeof *st);
2e0172b1
N
962 mdu_array_info_t array;
963 int fd;
1e08717f
SV
964
965 name = get_md_name(mse->devnm);
966 if (!name) {
967 free(st);
968 continue;
969 }
970
b9ce7ab0 971 snprintf(st->devname, MD_NAME_MAX + sizeof(DEV_MD_DIR), "%s", name);
2e0172b1 972 if ((fd = open(st->devname, O_RDONLY)) < 0 ||
9cd39f01 973 md_get_array_info(fd, &array) < 0) {
2e0172b1 974 /* no such array */
9cd39f01
JS
975 if (fd >= 0)
976 close(fd);
2e0172b1 977 put_md_name(st->devname);
2e0172b1
N
978 if (st->metadata) {
979 st->metadata->ss->free_super(st->metadata);
980 free(st->metadata);
981 }
982 free(st);
983 continue;
984 }
985 close(fd);
83f3bc5f 986 st->next = *statelist;
2e0172b1 987 st->err = 1;
73ff0732 988 st->from_auto = 1;
84d969be 989 snprintf(st->devnm, MD_NAME_MAX, "%s", mse->devnm);
9dad51d4 990 st->percent = RESYNC_UNKNOWN;
2e0172b1 991 st->expected_spares = -1;
eb28e119 992 if (mse->metadata_version &&
f566ef45
JS
993 strncmp(mse->metadata_version,
994 "external:", 9) == 0 &&
4dd2df09
N
995 is_subarray(mse->metadata_version+9)) {
996 char *sl;
84d969be
KT
997 snprintf(st->parent_devnm, MD_NAME_MAX,
998 "%s", mse->metadata_version + 10);
4dd2df09
N
999 sl = strchr(st->parent_devnm, '/');
1000 *sl = 0;
1001 } else
1002 st->parent_devnm[0] = 0;
83f3bc5f 1003 *statelist = st;
b3015166 1004 if (info.test)
cc3df167 1005 alert(EVENT_TEST_MESSAGE, NULL, 0, st->devname, NULL);
2e0172b1
N
1006 new_found = 1;
1007 }
1008 return new_found;
1009}
1010
5ec0f373 1011static int check_donor(struct state *from, struct state *to)
2feb22ef 1012{
66f5c4b6
N
1013 struct state *sub;
1014
2feb22ef
N
1015 if (from == to)
1016 return 0;
66f5c4b6
N
1017 if (from->parent)
1018 /* Cannot move from a member */
2feb22ef 1019 return 0;
ff044d6b
AC
1020 if (from->err)
1021 return 0;
66f5c4b6
N
1022 for (sub = from->subarray; sub; sub = sub->subarray)
1023 /* If source array has degraded subarrays, don't
1024 * remove anything
1025 */
1026 if (sub->active < sub->raid)
1027 return 0;
1028 if (from->metadata->ss->external == 0)
1029 if (from->active < from->raid)
1030 return 0;
2feb22ef
N
1031 if (from->spare <= 0)
1032 return 0;
e78dda3b 1033 return 1;
2feb22ef
N
1034}
1035
0f0749ad 1036static dev_t choose_spare(struct state *from, struct state *to,
f566ef45 1037 struct domainlist *domlist, struct spare_criteria *sc)
0fa21e85
N
1038{
1039 int d;
0f0749ad 1040 dev_t dev = 0;
0fa21e85 1041
b0599bda 1042 for (d = from->raid; !dev && d < MAX_DISKS; d++) {
f566ef45 1043 if (from->devid[d] > 0 && from->devstate[d] == 0) {
0fa21e85 1044 struct dev_policy *pol;
0fa21e85 1045
bfd76b93
CA
1046 if (to->metadata->ss->external &&
1047 test_partition_from_id(from->devid[d]))
1048 continue;
1049
51a9f2fc 1050 if (devid_matches_criteria(to->metadata, from->devid[d], sc) == false)
4b57ecf6
AO
1051 continue;
1052
4dd2df09 1053 pol = devid_policy(from->devid[d]);
0fa21e85
N
1054 if (from->spare_group)
1055 pol_add(&pol, pol_domain,
1056 from->spare_group, NULL);
f566ef45
JS
1057 if (domain_test(domlist, pol,
1058 to->metadata->ss->name) == 1)
0fa21e85
N
1059 dev = from->devid[d];
1060 dev_policy_free(pol);
1061 }
1062 }
1063 return dev;
1064}
1065
0f0749ad 1066static dev_t container_choose_spare(struct state *from, struct state *to,
f0b85306 1067 struct domainlist *domlist,
fbfdcb06 1068 struct spare_criteria *sc, int active)
5739e0d0
N
1069{
1070 /* This is similar to choose_spare, but we cannot trust devstate,
1071 * so we need to read the metadata instead
1072 */
326727d9 1073 struct mdinfo *list;
5739e0d0 1074 struct supertype *st = from->metadata;
ff044d6b 1075 int fd = open(from->devname, O_RDONLY);
5739e0d0 1076 int err;
0f0749ad 1077 dev_t dev = 0;
5739e0d0
N
1078
1079 if (fd < 0)
1080 return 0;
326727d9
AC
1081 if (!st->ss->getinfo_super_disks) {
1082 close(fd);
5739e0d0 1083 return 0;
326727d9 1084 }
1011e834 1085
5739e0d0
N
1086 err = st->ss->load_container(st, fd, NULL);
1087 close(fd);
1088 if (err)
1089 return 0;
1011e834 1090
a1e49d69
CA
1091 if (from == to) {
1092 /* We must check if number of active disks has not increased
1093 * since ioctl in main loop. mdmon may have added spare
1094 * to subarray. If so we do not need to look for more spares
1095 * so return non zero value */
1096 int active_cnt = 0;
1097 struct mdinfo *dp;
1098 list = st->ss->getinfo_super_disks(st);
1099 if (!list) {
1100 st->ss->free_super(st);
1101 return 1;
1102 }
1103 dp = list->devs;
1104 while (dp) {
f566ef45
JS
1105 if (dp->disk.state & (1 << MD_DISK_SYNC) &&
1106 !(dp->disk.state & (1 << MD_DISK_FAULTY)))
a1e49d69
CA
1107 active_cnt++;
1108 dp = dp->next;
1109 }
1110 sysfs_free(list);
1111 if (active < active_cnt) {
1112 /* Spare just activated.*/
1113 st->ss->free_super(st);
1114 return 1;
1115 }
1116 }
1117
326727d9 1118 /* We only need one spare so full list not needed */
fbfdcb06 1119 list = container_choose_spares(st, sc, domlist, from->spare_group,
326727d9
AC
1120 to->metadata->ss->name, 1);
1121 if (list) {
1122 struct mdinfo *disks = list->devs;
1123 if (disks)
1124 dev = makedev(disks->disk.major, disks->disk.minor);
1125 sysfs_free(list);
5739e0d0 1126 }
326727d9 1127 st->ss->free_super(st);
5739e0d0
N
1128 return dev;
1129}
1130
b3015166 1131static void try_spare_migration(struct state *statelist)
2e0172b1 1132{
66f5c4b6
N
1133 struct state *from;
1134 struct state *st;
c3621c0a
ML
1135
1136 link_containers_with_subarrays(statelist);
66f5c4b6 1137 for (st = statelist; st; st = st->next)
f566ef45 1138 if (st->active < st->raid && st->spare == 0 && !st->err) {
e78dda3b 1139 struct domainlist *domlist = NULL;
0c0f09cb 1140 struct spare_criteria sc = {0};
e78dda3b 1141 int d;
66f5c4b6
N
1142 struct state *to = st;
1143
4dd2df09 1144 if (to->parent_devnm[0] && !to->parent)
c0dc0ad5
CA
1145 /* subarray monitored without parent container
1146 * we can't move spares here */
1147 continue;
1011e834 1148
66f5c4b6
N
1149 if (to->parent)
1150 /* member of a container */
1151 to = to->parent;
e78dda3b 1152
f6562011
MT
1153 if (to->metadata->ss->get_spare_criteria)
1154 if (to->metadata->ss->get_spare_criteria(to->metadata, to->devname,
1155 &sc))
1156 continue;
1157
e9a2ac02
AC
1158 if (to->metadata->ss->external) {
1159 /* We must make sure there is
1160 * no suitable spare in container already.
1161 * If there is we don't add more */
1162 dev_t devid = container_choose_spare(
fbfdcb06 1163 to, to, NULL, &sc, st->active);
e9a2ac02
AC
1164 if (devid > 0)
1165 continue;
1166 }
b0599bda 1167 for (d = 0; d < MAX_DISKS; d++)
e78dda3b
N
1168 if (to->devid[d])
1169 domainlist_add_dev(&domlist,
1170 to->devid[d],
1171 to->metadata->ss->name);
1172 if (to->spare_group)
1173 domain_add(&domlist, to->spare_group);
5ec0f373
ML
1174 /*
1175 * No spare migration if the destination
1176 * has no domain. Skip this array.
1177 */
1178 if (!domlist)
1179 continue;
0fa21e85 1180 for (from=statelist ; from ; from=from->next) {
0f0749ad 1181 dev_t devid;
5ec0f373 1182 if (!check_donor(from, to))
0fa21e85 1183 continue;
5739e0d0
N
1184 if (from->metadata->ss->external)
1185 devid = container_choose_spare(
fbfdcb06 1186 from, to, domlist, &sc, 0);
5739e0d0 1187 else
f0b85306 1188 devid = choose_spare(from, to, domlist,
fbfdcb06 1189 &sc);
f566ef45
JS
1190 if (devid > 0 &&
1191 move_spare(from->devname, to->devname,
1192 devid)) {
cc3df167 1193 alert(EVENT_MOVE_SPARE, NULL, 0, to->devname, from->devname);
d52bb542
AC
1194 break;
1195 }
0fa21e85 1196 }
e78dda3b 1197 domain_free(domlist);
51a9f2fc 1198 dev_policy_free(sc.pols);
2e0172b1
N
1199 }
1200}
c3621c0a
ML
1201
1202/* search the statelist to connect external
1203 * metadata subarrays with their containers
1204 * We always completely rebuild the tree from scratch as
1205 * that is safest considering the possibility of entries
1206 * disappearing or changing.
1207 */
1208static void link_containers_with_subarrays(struct state *list)
1209{
1210 struct state *st;
1211 struct state *cont;
1212 for (st = list; st; st = st->next) {
1213 st->parent = NULL;
1214 st->subarray = NULL;
1215 }
1216 for (st = list; st; st = st->next)
4dd2df09 1217 if (st->parent_devnm[0])
c3621c0a 1218 for (cont = list; cont; cont = cont->next)
f566ef45 1219 if (!cont->err && cont->parent_devnm[0] == 0 &&
4dd2df09 1220 strcmp(cont->devnm, st->parent_devnm) == 0) {
c3621c0a
ML
1221 st->parent = cont;
1222 st->subarray = cont->subarray;
1223 cont->subarray = st;
1224 break;
1225 }
1226}
1227
55c10e4d
PB
1228/**
1229 * free_statelist() - Frees statelist.
1230 * @statelist: statelist to free
1231 */
1232static void free_statelist(struct state *statelist)
1233{
1234 struct state *tmp = NULL;
1235
1236 while (statelist) {
1237 if (statelist->spare_group)
1238 free(statelist->spare_group);
1239
1240 tmp = statelist;
1241 statelist = statelist->next;
1242 free(tmp);
1243 }
1244}
1245
b90c0e9a
NB
1246/* Not really Monitor but ... */
1247int Wait(char *dev)
1248{
4dd2df09 1249 char devnm[32];
cb91230c
TM
1250 dev_t rdev;
1251 char *tmp;
b90c0e9a 1252 int rv = 1;
d3f6cf4f 1253 int frozen_remaining = 3;
b90c0e9a 1254
cb91230c 1255 if (!stat_is_blkdev(dev, &rdev))
b90c0e9a 1256 return 2;
cb91230c
TM
1257
1258 tmp = devid2devnm(rdev);
1259 if (!tmp) {
1260 pr_err("Cannot get md device name.\n");
1261 return 2;
1262 }
1263
1264 strcpy(devnm, tmp);
b90c0e9a
NB
1265
1266 while(1) {
1267 struct mdstat_ent *ms = mdstat_read(1, 0);
1268 struct mdstat_ent *e;
1269
f566ef45 1270 for (e = ms; e; e = e->next)
4dd2df09 1271 if (strcmp(e->devnm, devnm) == 0)
b90c0e9a
NB
1272 break;
1273
d3f6cf4f 1274 if (e && e->percent == RESYNC_NONE) {
276be514
N
1275 /* We could be in the brief pause before something
1276 * starts. /proc/mdstat doesn't show that, but
1277 * sync_action does.
1278 */
1279 struct mdinfo mdi;
90fd7001 1280 char buf[SYSFS_MAX_BUF_SIZE];
dae13137
JS
1281
1282 if (sysfs_init(&mdi, -1, devnm))
1283 return 2;
276be514 1284 if (sysfs_get_str(&mdi, NULL, "sync_action",
90fd7001 1285 buf, sizeof(buf)) > 0 &&
d3f6cf4f 1286 strcmp(buf,"idle\n") != 0) {
276be514 1287 e->percent = RESYNC_UNKNOWN;
d3f6cf4f
N
1288 if (strcmp(buf, "frozen\n") == 0) {
1289 if (frozen_remaining == 0)
1290 e->percent = RESYNC_NONE;
1291 else
1292 frozen_remaining -= 1;
1293 }
1294 }
276be514 1295 }
639c3c10 1296 if (!e || e->percent == RESYNC_NONE) {
e7783ee6 1297 if (e && e->metadata_version &&
c94709e8
DW
1298 strncmp(e->metadata_version, "external:", 9) == 0) {
1299 if (is_subarray(&e->metadata_version[9]))
1300 ping_monitor(&e->metadata_version[9]);
1301 else
4dd2df09 1302 ping_monitor(devnm);
c94709e8 1303 }
b90c0e9a
NB
1304 free_mdstat(ms);
1305 return rv;
1306 }
89a10d84 1307 free_mdstat(ms);
b90c0e9a
NB
1308 rv = 0;
1309 mdstat_wait(5);
1310 }
1311}
679eb882 1312
43ebc910
GP
1313/* The state "broken" is used only for RAID0/LINEAR - it's the same as
1314 * "clean", but used in case the array has one or more members missing.
1315 */
679eb882 1316static char *clean_states[] = {
43ebc910 1317 "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
679eb882 1318
1ea04629 1319int WaitClean(char *dev, int verbose)
679eb882
N
1320{
1321 int fd;
1322 struct mdinfo *mdi;
1323 int rv = 1;
4dd2df09 1324 char devnm[32];
679eb882 1325
9e04ac1c
ZL
1326 if (!stat_is_blkdev(dev, NULL))
1327 return 2;
679eb882
N
1328 fd = open(dev, O_RDONLY);
1329 if (fd < 0) {
1330 if (verbose)
e7b84f9d 1331 pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
679eb882
N
1332 return 1;
1333 }
1334
4dd2df09
N
1335 strcpy(devnm, fd2devnm(fd));
1336 mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
679eb882
N
1337 if (!mdi) {
1338 if (verbose)
7a862a02 1339 pr_err("Failed to read sysfs attributes for %s\n", dev);
679eb882
N
1340 close(fd);
1341 return 0;
1342 }
1343
1344 switch(mdi->array.level) {
1345 case LEVEL_LINEAR:
1346 case LEVEL_MULTIPATH:
1347 case 0:
1348 /* safemode delay is irrelevant for these levels */
1349 rv = 0;
1350 }
1351
1352 /* for internal metadata the kernel handles the final clean
1353 * transition, containers can never be dirty
1354 */
1355 if (!is_subarray(mdi->text_version))
1356 rv = 0;
1357
1358 /* safemode disabled ? */
1359 if (mdi->safe_mode_delay == 0)
1360 rv = 0;
1361
1362 if (rv) {
4dd2df09 1363 int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
90fd7001 1364 char buf[SYSFS_MAX_BUF_SIZE];
efc67e8e 1365 int delay = 5000;
679eb882
N
1366
1367 /* minimize the safe_mode_delay and prepare to wait up to 5s
1368 * for writes to quiesce
1369 */
1370 sysfs_set_safemode(mdi, 1);
679eb882
N
1371
1372 /* wait for array_state to be clean */
1373 while (1) {
1374 rv = read(state_fd, buf, sizeof(buf));
1375 if (rv < 0)
1376 break;
43ebc910
GP
1377 if (sysfs_match_word(buf, clean_states) <
1378 (int)ARRAY_SIZE(clean_states) - 1)
679eb882 1379 break;
efc67e8e 1380 rv = sysfs_wait(state_fd, &delay);
679eb882
N
1381 if (rv < 0 && errno != EINTR)
1382 break;
1383 lseek(state_fd, 0, SEEK_SET);
1384 }
1385 if (rv < 0)
1386 rv = 1;
1ea04629 1387 else if (ping_monitor(mdi->text_version) == 0) {
679eb882
N
1388 /* we need to ping to close the window between array
1389 * state transitioning to clean and the metadata being
1390 * marked clean
1391 */
1392 rv = 0;
1ea04629 1393 } else {
679eb882 1394 rv = 1;
1ea04629
MT
1395 pr_err("Error connecting monitor with %s\n", dev);
1396 }
679eb882 1397 if (rv && verbose)
f566ef45 1398 pr_err("Error waiting for %s to be clean\n", dev);
679eb882
N
1399
1400 /* restore the original safe_mode_delay */
1401 sysfs_set_safemode(mdi, mdi->safe_mode_delay);
1402 close(state_fd);
1403 }
1404
1405 sysfs_free(mdi);
1406 close(fd);
1407
1408 return rv;
1409}