]>
git.ipfire.org Git - thirdparty/mdadm.git/blob - Monitor.c
2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@cse.unsw.edu.au>
24 * School of Computer Science and Engineering
25 * The University of New South Wales
38 static void alert(char *event
, char *dev
, char *disc
, char *mailaddr
, char *mailfrom
,
39 char *cmd
, int dosyslog
);
41 static char *percentalerts
[] = {
49 /* The largest number of disks current arrays can manage is 384
50 * This really should be dynamically, but that will have to wait
51 * At least it isn't MD_SB_DISKS.
54 int Monitor(mddev_dev_t devlist
,
55 char *mailaddr
, char *alert_cmd
,
56 int period
, int daemonise
, int scan
, int oneshot
,
57 int dosyslog
, int test
, char* pidfile
)
60 * Every few seconds, scan every md device looking for changes
61 * When a change is found, log it, possibly run the alert command,
62 * and possibly send Email
64 * For each array, we record:
66 * active/working/failed/spare drives
67 * State of each device.
68 * %rebuilt if rebuilding
70 * If the update time changes, check out all the data again
71 * It is possible that we cannot get the state of each device
72 * due to bugs in the md kernel module.
73 * We also read /proc/mdstat to get rebuild percent,
74 * and to get state on all active devices incase of kernel bug.
78 * An active device had Faulty set or Active/Sync removed
80 * A spare device had Faulty set
82 * An active device had a reverse transition
84 * percent went from -1 to +ve
85 * Rebuild20 Rebuild40 Rebuild60 Rebuild80
86 * percent went from below to not-below that number
88 * Couldn't access a device which was previously visible
90 * if we detect an array with active<raid and spare==0
91 * we look at other arrays that have same spare-group
92 * If we find one with active==raid and spare>0,
93 * and if we can get_disk_info and find a name
94 * Then we hot-remove and hot-add to the other array
96 * If devlist is NULL, then we can monitor everything because --scan
97 * was given. We get an initial list from config file and add anything
98 * that appears in /proc/mdstat
103 int devnum
; /* to sync with mdstat info */
107 int active
, working
, failed
, spare
, raid
;
109 int devstate
[MaxDisks
];
115 struct mdstat_ent
*mdstat
= NULL
;
116 char *mailfrom
= NULL
;
119 mailaddr
= conf_get_mailaddr();
120 if (mailaddr
&& ! scan
)
121 fprintf(stderr
, Name
": Monitor using email address \"%s\" from config file\n",
124 mailfrom
= conf_get_mailfrom();
127 alert_cmd
= conf_get_program();
128 if (alert_cmd
&& ! scan
)
129 fprintf(stderr
, Name
": Monitor using program \"%s\" from config file\n",
132 if (scan
&& !mailaddr
&& !alert_cmd
) {
133 fprintf(stderr
, Name
": No mail address or alert command - not monitoring.\n");
144 pid_file
=fopen(pidfile
, "w");
146 perror("cannot create pid file");
148 fprintf(pid_file
,"%d\n", pid
);
159 open("/dev/null", O_RDWR
);
165 if (devlist
== NULL
) {
166 mddev_ident_t mdlist
= conf_get_ident(NULL
);
167 for (; mdlist
; mdlist
=mdlist
->next
) {
169 if (mdlist
->devname
== NULL
)
171 st
= malloc(sizeof *st
);
174 st
->devname
= strdup(mdlist
->devname
);
176 st
->next
= statelist
;
178 st
->devnum
= INT_MAX
;
180 st
->expected_spares
= mdlist
->spare_disks
;
181 if (mdlist
->spare_group
)
182 st
->spare_group
= strdup(mdlist
->spare_group
);
184 st
->spare_group
= NULL
;
189 for (dv
=devlist
; dv
; dv
=dv
->next
) {
190 mddev_ident_t mdlist
= conf_get_ident(dv
->devname
);
191 struct state
*st
= malloc(sizeof *st
);
194 st
->devname
= strdup(dv
->devname
);
196 st
->next
= statelist
;
198 st
->devnum
= INT_MAX
;
200 st
->expected_spares
= -1;
201 st
->spare_group
= NULL
;
203 st
->expected_spares
= mdlist
->spare_disks
;
204 if (mdlist
->spare_group
)
205 st
->spare_group
= strdup(mdlist
->spare_group
);
218 mdstat
= mdstat_read(oneshot
?0:1, 0);
220 for (st
=statelist
; st
; st
=st
->next
) {
221 struct { int state
, major
, minor
; } info
[MaxDisks
];
222 mdu_array_info_t array
;
223 struct mdstat_ent
*mse
= NULL
, *mse2
;
224 char *dev
= st
->devname
;
229 alert("TestMessage", dev
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
230 fd
= open(dev
, O_RDONLY
);
233 alert("DeviceDisappeared", dev
, NULL
,
234 mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
235 /* fprintf(stderr, Name ": cannot open %s: %s\n",
236 dev, strerror(errno));
240 fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
241 if (ioctl(fd
, GET_ARRAY_INFO
, &array
)<0) {
243 alert("DeviceDisappeared", dev
, NULL
,
244 mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
245 /* fprintf(stderr, Name ": cannot get array info for %s: %s\n",
246 dev, strerror(errno));
251 /* It's much easier to list what array levels can't
252 * have a device disappear than all of them that can
254 if (array
.level
== 0 || array
.level
== -1) {
256 alert("DeviceDisappeared", dev
, "Wrong-Level",
257 mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
262 if (st
->devnum
== INT_MAX
) {
264 if (fstat(fd
, &stb
) == 0 &&
265 (S_IFMT
&stb
.st_mode
)==S_IFBLK
) {
266 if (major(stb
.st_rdev
) == MD_MAJOR
)
267 st
->devnum
= minor(stb
.st_rdev
);
269 st
->devnum
= -1- (minor(stb
.st_rdev
)>>6);
273 for (mse2
= mdstat
; mse2
; mse2
=mse2
->next
)
274 if (mse2
->devnum
== st
->devnum
) {
275 mse2
->devnum
= INT_MAX
; /* flag it as "used" */
279 if (st
->utime
== array
.utime
&&
280 st
->failed
== array
.failed_disks
&&
281 st
->working
== array
.working_disks
&&
282 st
->spare
== array
.spare_disks
&&
284 mse
->percent
== st
->percent
290 if (st
->utime
== 0 && /* new array */
291 mse
&& /* is in /proc/mdstat */
292 mse
->pattern
&& strchr(mse
->pattern
, '_') /* degraded */
294 alert("DegradedArray", dev
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
296 if (st
->utime
== 0 && /* new array */
297 st
->expected_spares
> 0 &&
298 array
.spare_disks
< st
->expected_spares
)
299 alert("SparesMissing", dev
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
303 alert("RebuildStarted", dev
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
307 (mse
->percent
/ 20) > (st
->percent
/ 20))
308 alert(percentalerts
[mse
->percent
/20],
309 dev
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
312 mse
->percent
== -1 &&
314 /* Rebuild/sync/whatever just finished.
315 * If there is a number in /mismatch_cnt,
316 * we should report that.
319 sysfs_read(-1, st
->devnum
, GET_MISMATCH
);
320 if (sra
&& sra
->mismatch_cnt
> 0) {
322 sprintf(cnt
, " mismatches found: %d", sra
->mismatch_cnt
);
323 alert("RebuildFinished", dev
, cnt
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
325 alert("RebuildFinished", dev
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
331 st
->percent
= mse
->percent
;
334 for (i
=0; i
<MaxDisks
&& i
<= array
.raid_disks
+ array
.nr_disks
;
336 mdu_disk_info_t disc
;
338 if (ioctl(fd
, GET_DISK_INFO
, &disc
) >= 0) {
339 info
[i
].state
= disc
.state
;
340 info
[i
].major
= disc
.major
;
341 info
[i
].minor
= disc
.minor
;
343 info
[i
].major
= info
[i
].minor
= 0;
347 for (i
=0; i
<MaxDisks
; i
++) {
348 mdu_disk_info_t disc
= {0};
353 if (i
> array
.raid_disks
+ array
.nr_disks
) {
355 disc
.major
= disc
.minor
= 0;
356 } else if (info
[i
].major
|| info
[i
].minor
) {
357 newstate
= info
[i
].state
;
358 dv
= map_dev(info
[i
].major
, info
[i
].minor
, 1);
359 disc
.state
= newstate
;
360 disc
.major
= info
[i
].major
;
361 disc
.minor
= info
[i
].minor
;
362 } else if (mse
&& mse
->pattern
&& i
< strlen(mse
->pattern
)) {
363 switch(mse
->pattern
[i
]) {
364 case 'U': newstate
= 6 /* ACTIVE/SYNC */; break;
365 case '_': newstate
= 0; break;
367 disc
.major
= disc
.minor
= 0;
369 if (dv
== NULL
&& st
->devid
[i
])
370 dv
= map_dev(major(st
->devid
[i
]),
371 minor(st
->devid
[i
]), 1);
372 change
= newstate
^ st
->devstate
[i
];
373 if (st
->utime
&& change
&& !st
->err
) {
374 if (i
< (unsigned)array
.raid_disks
&&
375 (((newstate
&change
)&(1<<MD_DISK_FAULTY
)) ||
376 ((st
->devstate
[i
]&change
)&(1<<MD_DISK_ACTIVE
)) ||
377 ((st
->devstate
[i
]&change
)&(1<<MD_DISK_SYNC
)))
379 alert("Fail", dev
, dv
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
380 else if (i
>= (unsigned)array
.raid_disks
&&
381 (disc
.major
|| disc
.minor
) &&
382 st
->devid
[i
] == makedev(disc
.major
, disc
.minor
) &&
383 ((newstate
&change
)&(1<<MD_DISK_FAULTY
))
385 alert("FailSpare", dev
, dv
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
386 else if (i
< (unsigned)array
.raid_disks
&&
387 (((st
->devstate
[i
]&change
)&(1<<MD_DISK_FAULTY
)) ||
388 ((newstate
&change
)&(1<<MD_DISK_ACTIVE
)) ||
389 ((newstate
&change
)&(1<<MD_DISK_SYNC
)))
391 alert("SpareActive", dev
, dv
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
393 st
->devstate
[i
] = newstate
;
394 st
->devid
[i
] = makedev(disc
.major
, disc
.minor
);
396 st
->active
= array
.active_disks
;
397 st
->working
= array
.working_disks
;
398 st
->spare
= array
.spare_disks
;
399 st
->failed
= array
.failed_disks
;
400 st
->utime
= array
.utime
;
401 st
->raid
= array
.raid_disks
;
404 /* now check if there are any new devices found in mdstat */
406 struct mdstat_ent
*mse
;
407 for (mse
=mdstat
; mse
; mse
=mse
->next
)
408 if (mse
->devnum
!= INT_MAX
&&
410 (strcmp(mse
->level
, "raid0")!=0 &&
411 strcmp(mse
->level
, "linear")!=0)
413 struct state
*st
= malloc(sizeof *st
);
414 mdu_array_info_t array
;
418 st
->devname
= strdup(get_md_name(mse
->devnum
));
419 if ((fd
= open(st
->devname
, O_RDONLY
)) < 0 ||
420 ioctl(fd
, GET_ARRAY_INFO
, &array
)< 0) {
422 if (fd
>=0) close(fd
);
423 put_md_name(st
->devname
);
430 st
->next
= statelist
;
432 st
->devnum
= mse
->devnum
;
434 st
->spare_group
= NULL
;
435 st
->expected_spares
= -1;
438 alert("TestMessage", st
->devname
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
439 alert("NewArray", st
->devname
, NULL
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
443 /* If an array has active < raid && spare == 0 && spare_group != NULL
444 * Look for another array with spare > 0 and active == raid and same spare_group
445 * if found, choose a device and hotremove/hotadd
447 for (st
= statelist
; st
; st
=st
->next
)
448 if (st
->active
< st
->raid
&&
450 st
->spare_group
!= NULL
) {
452 for (st2
=statelist
; st2
; st2
=st2
->next
)
455 st2
->active
== st2
->raid
&&
456 st2
->spare_group
!= NULL
&&
457 strcmp(st
->spare_group
, st2
->spare_group
) == 0) {
458 /* try to remove and add */
459 int fd1
= open(st
->devname
, O_RDONLY
);
460 int fd2
= open(st2
->devname
, O_RDONLY
);
463 if (fd1
< 0 || fd2
< 0) {
464 if (fd1
>=0) close(fd1
);
465 if (fd2
>=0) close(fd2
);
468 for (d
=st2
->raid
; d
< MaxDisks
; d
++) {
469 if (st2
->devid
[d
] > 0 &&
470 st2
->devstate
[d
] == 0) {
476 if (ioctl(fd2
, HOT_REMOVE_DISK
,
477 (unsigned long)dev
) == 0) {
478 if (ioctl(fd1
, HOT_ADD_DISK
,
479 (unsigned long)dev
) == 0) {
480 alert("MoveSpare", st
->devname
, st2
->devname
, mailaddr
, mailfrom
, alert_cmd
, dosyslog
);
485 else ioctl(fd2
, HOT_ADD_DISK
, (unsigned long) dev
);
506 static void alert(char *event
, char *dev
, char *disc
, char *mailaddr
, char *mailfrom
, char *cmd
,
511 if (!cmd
&& !mailaddr
) {
512 time_t now
= time(0);
514 printf("%1.15s: %s on %s %s\n", ctime(&now
)+4, event
, dev
, disc
?disc
:"unknown device");
520 waitpid(pid
, NULL
, 0);
525 execl(cmd
, cmd
, event
, dev
, disc
, NULL
);
530 (strncmp(event
, "Fail", 4)==0 ||
531 strncmp(event
, "Test", 4)==0 ||
532 strncmp(event
, "Spares", 6)==0 ||
533 strncmp(event
, "Degrade", 7)==0)) {
534 FILE *mp
= popen(Sendmail
, "w");
538 gethostname(hname
, sizeof(hname
));
539 signal(SIGPIPE
, SIG_IGN
);
541 fprintf(mp
, "From: %s\n", mailfrom
);
543 fprintf(mp
, "From: " Name
" monitoring <root>\n");
544 fprintf(mp
, "To: %s\n", mailaddr
);
545 fprintf(mp
, "Subject: %s event on %s:%s\n\n", event
, dev
, hname
);
547 fprintf(mp
, "This is an automatically generated mail message from " Name
"\n");
548 fprintf(mp
, "running on %s\n\n", hname
);
550 fprintf(mp
, "A %s event had been detected on md device %s.\n\n", event
, dev
);
552 if (disc
&& disc
[0] != ' ')
553 fprintf(mp
, "It could be related to component device %s.\n\n", disc
);
554 if (disc
&& disc
[0] == ' ')
555 fprintf(mp
, "Extra information:%s.\n\n", disc
);
557 fprintf(mp
, "Faithfully yours, etc.\n");
559 mdstat
= fopen("/proc/mdstat", "r");
563 fprintf(mp
, "\nP.S. The /proc/mdstat file currently contains the following:\n\n");
564 while ( (n
=fread(buf
, 1, sizeof(buf
), mdstat
)) > 0)
565 n
=fwrite(buf
, 1, n
, mp
); /* yes, i don't care about the result */
573 /* log the event to syslog maybe */
575 /* Log at a different severity depending on the event.
577 * These are the critical events: */
578 if (strncmp(event
, "Fail", 4)==0 ||
579 strncmp(event
, "Degrade", 7)==0 ||
580 strncmp(event
, "DeviceDisappeared", 17)==0)
582 /* Good to know about, but are not failures: */
583 else if (strncmp(event
, "Rebuild", 7)==0 ||
584 strncmp(event
, "MoveSpare", 9)==0 ||
585 strncmp(event
, "Spares", 6) != 0)
586 priority
= LOG_WARNING
;
587 /* Everything else: */
592 syslog(priority
, "%s event detected on md device %s, component device %s", event
, dev
, disc
);
594 syslog(priority
, "%s event detected on md device %s", event
, dev
);
598 /* Not really Monitor but ... */
605 if (stat(dev
, &stb
) != 0) {
606 fprintf(stderr
, Name
": Cannot find %s: %s\n", dev
,
610 devnum
= stat2devnum(&stb
);
613 struct mdstat_ent
*ms
= mdstat_read(1, 0);
614 struct mdstat_ent
*e
;
616 for (e
=ms
; e
; e
=e
->next
)
617 if (e
->devnum
== devnum
)
620 if (!e
|| e
->percent
< 0) {
621 if (e
&& e
->metadata_version
&&
622 strncmp(e
->metadata_version
, "external:", 9) == 0) {
623 if (is_subarray(&e
->metadata_version
[9]))
624 ping_monitor(&e
->metadata_version
[9]);
626 ping_monitor(devnum2devname(devnum
));
637 static char *clean_states
[] = {
638 "clear", "inactive", "readonly", "read-auto", "clean", NULL
};
640 int WaitClean(char *dev
, int verbose
)
647 fd
= open(dev
, O_RDONLY
);
650 fprintf(stderr
, Name
": Couldn't open %s: %s\n", dev
, strerror(errno
));
654 devnum
= fd2devnum(fd
);
655 mdi
= sysfs_read(fd
, devnum
, GET_VERSION
|GET_LEVEL
|GET_SAFEMODE
);
658 fprintf(stderr
, Name
": Failed to read sysfs attributes for "
664 switch(mdi
->array
.level
) {
666 case LEVEL_MULTIPATH
:
668 /* safemode delay is irrelevant for these levels */
673 /* for internal metadata the kernel handles the final clean
674 * transition, containers can never be dirty
676 if (!is_subarray(mdi
->text_version
))
679 /* safemode disabled ? */
680 if (mdi
->safe_mode_delay
== 0)
684 int state_fd
= sysfs_open(fd2devnum(fd
), NULL
, "array_state");
689 /* minimize the safe_mode_delay and prepare to wait up to 5s
690 * for writes to quiesce
692 sysfs_set_safemode(mdi
, 1);
696 /* give mdmon a chance to checkpoint resync */
697 sysfs_set_str(mdi
, NULL
, "sync_action", "idle");
701 /* wait for array_state to be clean */
703 rv
= read(state_fd
, buf
, sizeof(buf
));
706 if (sysfs_match_word(buf
, clean_states
) <= 4)
708 FD_SET(state_fd
, &fds
);
709 rv
= select(state_fd
+ 1, &fds
, NULL
, NULL
, &tm
);
710 if (rv
< 0 && errno
!= EINTR
)
712 lseek(state_fd
, 0, SEEK_SET
);
716 else if (ping_monitor(mdi
->text_version
) == 0) {
717 /* we need to ping to close the window between array
718 * state transitioning to clean and the metadata being
725 fprintf(stderr
, Name
": Error waiting for %s to be clean\n",
728 /* restore the original safe_mode_delay */
729 sysfs_set_safemode(mdi
, mdi
->safe_mode_delay
);