]>
git.ipfire.org Git - thirdparty/mdadm.git/blob - Monitor.c
2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@cse.unsw.edu.au>
24 * School of Computer Science and Engineering
25 * The University of New South Wales
33 #include <sys/signal.h>
35 static void alert(char *event
, char *dev
, char *disc
, char *mailaddr
, char *cmd
);
37 int Monitor(mddev_dev_t devlist
,
38 char *mailaddr
, char *alert_cmd
,
43 * Every few seconds, scan every md device looking for changes
44 * When a change is found, log it, possibly run the alert command,
45 * and possibly send Email
47 * For each array, we record:
49 * active/working/failed/spare drives
50 * State of each device.
52 * If the update time changes, check out all the data again
53 * It is possible that we cannot get the state of each device
54 * due to bugs in the md kernel module.
56 * if active_drives decreases, generate a "Fail" event
57 * if active_drives increases, generate a "SpareActive" event
59 * if we detect an array with active<raid and spare==0
60 * we look at other arrays that have same spare-group
61 * If we find one with active==raid and spare>0,
62 * and if we can get_disk_info and find a name
63 * Then we hot-remove and hot-add to the other array
71 int active
, working
, failed
, spare
;
72 int devstate
[MD_SB_DISKS
];
77 mddev_ident_t mdlist
= NULL
;
81 mdlist
= conf_get_ident(config
, NULL
);
83 while (dv
|| mdlist
) {
84 mddev_ident_t mdident
;
86 mdu_array_info_t array
;
91 char *event_disc
= NULL
;
94 mdident
= conf_get_ident(config
, dev
);
98 dev
= mdident
->devname
;
99 mdlist
= mdlist
->next
;
101 for (st
=statelist
; st
; st
=st
->next
)
102 if (strcmp(st
->devname
, dev
)==0)
105 st
=malloc(sizeof *st
);
108 st
->devname
= strdup(dev
);
110 st
->next
= statelist
;
114 fd
= open(dev
, O_RDONLY
);
117 fprintf(stderr
, Name
": cannot open %s: %s\n",
118 dev
, strerror(errno
));
122 if (ioctl(fd
, GET_ARRAY_INFO
, &array
)<0) {
124 fprintf(stderr
, Name
": cannot get array info for %s: %s\n",
125 dev
, strerror(errno
));
132 if (st
->utime
== array
.utime
&&
133 st
->failed
== array
.failed_disks
) {
140 if (st
->active
> array
.active_disks
)
142 else if (st
->working
> array
.working_disks
)
144 else if (st
->active
< array
.active_disks
)
145 event
= "ActiveSpare";
147 for (i
=0; i
<array
.raid_disks
+array
.spare_disks
; i
++) {
148 mdu_disk_info_t disc
;
150 if (ioctl(fd
, GET_DISK_INFO
, &disc
)>= 0) {
151 if (event
&& event_disc
== NULL
&&
152 st
->devstate
[i
] != disc
.state
) {
153 char * dv
= map_dev(disc
.major
, disc
.minor
);
155 event_disc
= strdup(dv
);
157 st
->devstate
[i
] = disc
.state
;
161 st
->active
= array
.active_disks
;
162 st
->working
= array
.working_disks
;
163 st
->spare
= array
.spare_disks
;
164 st
->failed
= array
.failed_disks
;
165 st
->utime
= array
.utime
;
167 alert(event
, dev
, event_disc
, mailaddr
, alert_cmd
);
175 static void alert(char *event
, char *dev
, char *disc
, char *mailaddr
, char *cmd
)
177 if (!cmd
&& !mailaddr
) {
178 time_t now
= time(0);
180 printf("%0.15s: %s on %s %s\n", ctime(&now
)+4, event
, dev
, disc
?disc
:"unknown device");
186 waitpid(pid
, NULL
, 0);
191 execl(cmd
, cmd
, event
, dev
, disc
, NULL
);
195 if (mailaddr
&& strncmp(event
, "Fail", 4)==0) {
196 FILE *mp
= popen(Sendmail
, "w");
199 gethostname(hname
, sizeof(hname
));
200 signal(SIGPIPE
, SIG_IGN
);
201 fprintf(mp
, "From: " Name
" monitoring <root>\n");
202 fprintf(mp
, "To: %s\n", mailaddr
);
203 fprintf(mp
, "Subject: %s event on %s:%s\n\n", event
, dev
, hname
);
205 fprintf(mp
, "This is an automatically generated mail message from " Name
"\n");
206 fprintf(mp
, "running on %s\n\n", hname
);
208 fprintf(mp
, "A %s event had been detected on md device %s.\n\n", event
, dev
);
211 fprintf(mp
, "It could be related to sub-device %s.\n\n", disc
);
213 fprintf(mp
, "Faithfully yours, etc.\n");
218 /* FIXME log the event to syslog maybe */