Replace sysarray with mdinfo
[thirdparty/mdadm.git] / Monitor.c
1 /*
2  * mdadm - manage Linux "md" devices aka RAID arrays.
3  *
4  * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
5  *
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2 of the License, or
10  *    (at your option) any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  *    Author: Neil Brown
22  *    Email: <neilb@cse.unsw.edu.au>
23  *    Paper: Neil Brown
24  *           School of Computer Science and Engineering
25  *           The University of New South Wales
26  *           Sydney, 2052
27  *           Australia
28  */
29
30 #include        "mdadm.h"
31 #include        "md_p.h"
32 #include        "md_u.h"
33 #include        <sys/wait.h>
34 #include        <signal.h>
35 #include        <limits.h>
36 #include        <syslog.h>
37
38 static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom,
39                   char *cmd, int dosyslog);
40
41 static char *percentalerts[] = {
42         "RebuildStarted",
43         "Rebuild20",
44         "Rebuild40",
45         "Rebuild60",
46         "Rebuild80",
47 };
48
49 /* The largest number of disks current arrays can manage is 384
50  * This really should be dynamically, but that will have to wait
51  * At least it isn't MD_SB_DISKS.
52  */
53 #define MaxDisks 384
54 int Monitor(mddev_dev_t devlist,
55             char *mailaddr, char *alert_cmd,
56             int period, int daemonise, int scan, int oneshot,
57             int dosyslog, int test, char* pidfile)
58 {
59         /*
60          * Every few seconds, scan every md device looking for changes
61          * When a change is found, log it, possibly run the alert command,
62          * and possibly send Email
63          *
64          * For each array, we record:
65          *   Update time
66          *   active/working/failed/spare drives
67          *   State of each device.
68          *   %rebuilt if rebuilding
69          *
70          * If the update time changes, check out all the data again
71          * It is possible that we cannot get the state of each device
72          * due to bugs in the md kernel module.
73          * We also read /proc/mdstat to get rebuild percent,
74          * and to get state on all active devices incase of kernel bug.
75          *
76          * Events are:
77          *    Fail
78          *      An active device had Faulty set or Active/Sync removed
79          *    FailSpare
80          *      A spare device had Faulty set
81          *    SpareActive
82          *      An active device had a reverse transition
83          *    RebuildStarted
84          *      percent went from -1 to +ve
85          *    Rebuild20 Rebuild40 Rebuild60 Rebuild80
86          *      percent went from below to not-below that number
87          *    DeviceDisappeared
88          *      Couldn't access a device which was previously visible
89          *
90          * if we detect an array with active<raid and spare==0
91          * we look at other arrays that have same spare-group
92          * If we find one with active==raid and spare>0,
93          *  and if we can get_disk_info and find a name
94          *  Then we hot-remove and hot-add to the other array
95          *
96          * If devlist is NULL, then we can monitor everything because --scan
97          * was given.  We get an initial list from config file and add anything
98          * that appears in /proc/mdstat
99          */
100
101         struct state {
102                 char *devname;
103                 int devnum;     /* to sync with mdstat info */
104                 long utime;
105                 int err;
106                 char *spare_group;
107                 int active, working, failed, spare, raid;
108                 int expected_spares;
109                 int devstate[MaxDisks];
110                 int devid[MaxDisks];
111                 int percent;
112                 struct state *next;
113         } *statelist = NULL;
114         int finished = 0;
115         struct mdstat_ent *mdstat = NULL;
116         char *mailfrom = NULL;
117
118         if (!mailaddr) {
119                 mailaddr = conf_get_mailaddr();
120                 if (mailaddr && ! scan)
121                         fprintf(stderr, Name ": Monitor using email address \"%s\" from config file\n",
122                                mailaddr);
123         }
124         mailfrom = conf_get_mailfrom();
125
126         if (!alert_cmd) {
127                 alert_cmd = conf_get_program();
128                 if (alert_cmd && ! scan)
129                         fprintf(stderr, Name ": Monitor using program \"%s\" from config file\n",
130                                alert_cmd);
131         }
132         if (scan && !mailaddr && !alert_cmd) {
133                 fprintf(stderr, Name ": No mail address or alert command - not monitoring.\n");
134                 return 1;
135         }
136
137         if (daemonise) {
138                 int pid = fork();
139                 if (pid > 0) {
140                         if (!pidfile)
141                                 printf("%d\n", pid);
142                         else {
143                                 FILE *pid_file;
144                                 pid_file=fopen(pidfile, "w");
145                                 if (!pid_file)
146                                         perror("cannot create pid file");
147                                 else {
148                                         fprintf(pid_file,"%d\n", pid);
149                                         fclose(pid_file);
150                                 }
151                         }
152                         return 0;
153                 }
154                 if (pid < 0) {
155                         perror("daemonise");
156                         return 1;
157                 }
158                 close(0);
159                 open("/dev/null", 3);
160                 dup2(0,1);
161                 dup2(0,2);
162                 setsid();
163         }
164
165         if (devlist == NULL) {
166                 mddev_ident_t mdlist = conf_get_ident(NULL);
167                 for (; mdlist; mdlist=mdlist->next) {
168                         struct state *st = malloc(sizeof *st);
169                         if (st == NULL)
170                                 continue;
171                         st->devname = strdup(mdlist->devname);
172                         st->utime = 0;
173                         st->next = statelist;
174                         st->err = 0;
175                         st->devnum = INT_MAX;
176                         st->percent = -2;
177                         st->expected_spares = mdlist->spare_disks;
178                         if (mdlist->spare_group)
179                                 st->spare_group = strdup(mdlist->spare_group);
180                         else
181                                 st->spare_group = NULL;
182                         statelist = st;
183                 }
184         } else {
185                 mddev_dev_t dv;
186                 for (dv=devlist ; dv; dv=dv->next) {
187                         mddev_ident_t mdlist = conf_get_ident(dv->devname);
188                         struct state *st = malloc(sizeof *st);
189                         if (st == NULL)
190                                 continue;
191                         st->devname = strdup(dv->devname);
192                         st->utime = 0;
193                         st->next = statelist;
194                         st->err = 0;
195                         st->devnum = INT_MAX;
196                         st->percent = -2;
197                         st->expected_spares = -1;
198                         st->spare_group = NULL;
199                         if (mdlist) {
200                                 st->expected_spares = mdlist->spare_disks;
201                                 if (mdlist->spare_group)
202                                         st->spare_group = strdup(mdlist->spare_group);
203                         }
204                         statelist = st;
205                 }
206         }
207
208
209         while (! finished) {
210                 int new_found = 0;
211                 struct state *st;
212
213                 if (mdstat)
214                         free_mdstat(mdstat);
215                 mdstat = mdstat_read(oneshot?0:1, 0);
216
217                 for (st=statelist; st; st=st->next) {
218                         struct { int state, major, minor; } info[MaxDisks];
219                         mdu_array_info_t array;
220                         struct mdstat_ent *mse = NULL, *mse2;
221                         char *dev = st->devname;
222                         int fd;
223                         unsigned int i;
224
225                         if (test)
226                                 alert("TestMessage", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
227                         fd = open(dev, O_RDONLY);
228                         if (fd < 0) {
229                                 if (!st->err)
230                                         alert("DeviceDisappeared", dev, NULL,
231                                               mailaddr, mailfrom, alert_cmd, dosyslog);
232 /*                                      fprintf(stderr, Name ": cannot open %s: %s\n",
233                                                 dev, strerror(errno));
234 */                              st->err=1;
235                                 continue;
236                         }
237                         fcntl(fd, F_SETFD, FD_CLOEXEC);
238                         if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
239                                 if (!st->err)
240                                         alert("DeviceDisappeared", dev, NULL,
241                                               mailaddr, mailfrom, alert_cmd, dosyslog);
242 /*                                      fprintf(stderr, Name ": cannot get array info for %s: %s\n",
243                                                 dev, strerror(errno));
244 */                              st->err=1;
245                                 close(fd);
246                                 continue;
247                         }
248                         /* It's much easier to list what array levels can't
249                          * have a device disappear than all of them that can
250                          */
251                         if (array.level == 0 || array.level == -1) {
252                                 if (!st->err)
253                                         alert("DeviceDisappeared", dev, "Wrong-Level",
254                                               mailaddr, mailfrom, alert_cmd, dosyslog);
255                                 st->err = 1;
256                                 close(fd);
257                                 continue;
258                         }
259                         if (st->devnum == INT_MAX) {
260                                 struct stat stb;
261                                 if (fstat(fd, &stb) == 0 &&
262                                     (S_IFMT&stb.st_mode)==S_IFBLK) {
263                                         if (major(stb.st_rdev) == MD_MAJOR)
264                                                 st->devnum = minor(stb.st_rdev);
265                                         else
266                                                 st->devnum = -1- (minor(stb.st_rdev)>>6);
267                                 }
268                         }
269
270                         for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
271                                 if (mse2->devnum == st->devnum) {
272                                         mse2->devnum = INT_MAX; /* flag it as "used" */
273                                         mse = mse2;
274                                 }
275
276                         if (st->utime == array.utime &&
277                             st->failed == array.failed_disks &&
278                             st->working == array.working_disks &&
279                             st->spare == array.spare_disks &&
280                             (mse == NULL  || (
281                                     mse->percent == st->percent
282                                     ))) {
283                                 close(fd);
284                                 st->err = 0;
285                                 continue;
286                         }
287                         if (st->utime == 0 && /* new array */
288                             mse &&      /* is in /proc/mdstat */
289                             mse->pattern && strchr(mse->pattern, '_') /* degraded */
290                                 )
291                                 alert("DegradedArray", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
292
293                         if (st->utime == 0 && /* new array */
294                             st->expected_spares > 0 &&
295                             array.spare_disks < st->expected_spares)
296                                 alert("SparesMissing", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
297                         if (mse &&
298                             st->percent == -1 &&
299                             mse->percent >= 0)
300                                 alert("RebuildStarted", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
301                         if (mse &&
302                             st->percent >= 0 &&
303                             mse->percent >= 0 &&
304                             (mse->percent / 20) > (st->percent / 20))
305                                 alert(percentalerts[mse->percent/20],
306                                       dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
307
308                         if (mse &&
309                             mse->percent == -1 &&
310                             st->percent >= 0) {
311                                 /* Rebuild/sync/whatever just finished.
312                                  * If there is a number in /mismatch_cnt,
313                                  * we should report that.
314                                  */
315                                 struct mdinfo *sra =
316                                        sysfs_read(-1, st->devnum, GET_MISMATCH);
317                                 if (sra && sra->mismatch_cnt > 0) {
318                                         char cnt[40];
319                                         sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
320                                         alert("RebuildFinished", dev, cnt, mailaddr, mailfrom, alert_cmd, dosyslog);
321                                 } else
322                                         alert("RebuildFinished", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
323                                 if (sra)
324                                         free(sra);
325                         }
326
327                         if (mse)
328                                 st->percent = mse->percent;
329
330
331                         for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
332                              i++) {
333                                 mdu_disk_info_t disc;
334                                 disc.number = i;
335                                 if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
336                                         info[i].state = disc.state;
337                                         info[i].major = disc.major;
338                                         info[i].minor = disc.minor;
339                                 } else
340                                         info[i].major = info[i].minor = 0;
341                         }
342                         close(fd);
343
344                         for (i=0; i<MaxDisks; i++) {
345                                 mdu_disk_info_t disc = {0};
346                                 int newstate=0;
347                                 int change;
348                                 char *dv = NULL;
349                                 disc.number = i;
350                                 if (i > array.raid_disks + array.nr_disks) {
351                                         newstate = 0;
352                                         disc.major = disc.minor = 0;
353                                 } else if (info[i].major || info[i].minor) {
354                                         newstate = info[i].state;
355                                         dv = map_dev(info[i].major, info[i].minor, 1);
356                                         disc.state = newstate;
357                                         disc.major = info[i].major;
358                                         disc.minor = info[i].minor;
359                                 } else if (mse &&  mse->pattern && i < strlen(mse->pattern)) {
360                                         switch(mse->pattern[i]) {
361                                         case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
362                                         case '_': newstate = 0; break;
363                                         }
364                                         disc.major = disc.minor = 0;
365                                 }
366                                 if (dv == NULL && st->devid[i])
367                                         dv = map_dev(major(st->devid[i]),
368                                                      minor(st->devid[i]), 1);
369                                 change = newstate ^ st->devstate[i];
370                                 if (st->utime && change && !st->err) {
371                                         if (i < (unsigned)array.raid_disks &&
372                                             (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
373                                              ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
374                                              ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
375                                                 )
376                                                 alert("Fail", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
377                                         else if (i >= (unsigned)array.raid_disks &&
378                                                  (disc.major || disc.minor) &&
379                                                  st->devid[i] == makedev(disc.major, disc.minor) &&
380                                                  ((newstate&change)&(1<<MD_DISK_FAULTY))
381                                                 )
382                                                 alert("FailSpare", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
383                                         else if (i < (unsigned)array.raid_disks &&
384                                                  (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
385                                                   ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
386                                                   ((newstate&change)&(1<<MD_DISK_SYNC)))
387                                                 )
388                                                 alert("SpareActive", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
389                                 }
390                                 st->devstate[i] = newstate;
391                                 st->devid[i] = makedev(disc.major, disc.minor);
392                         }
393                         st->active = array.active_disks;
394                         st->working = array.working_disks;
395                         st->spare = array.spare_disks;
396                         st->failed = array.failed_disks;
397                         st->utime = array.utime;
398                         st->raid = array.raid_disks;
399                         st->err = 0;
400                 }
401                 /* now check if there are any new devices found in mdstat */
402                 if (scan) {
403                         struct mdstat_ent *mse;
404                         for (mse=mdstat; mse; mse=mse->next)
405                                 if (mse->devnum != INT_MAX &&
406                                     (strcmp(mse->level, "raid0")!=0 &&
407                                      strcmp(mse->level, "linear")!=0)
408                                         ) {
409                                         struct state *st = malloc(sizeof *st);
410                                         mdu_array_info_t array;
411                                         int fd;
412                                         if (st == NULL)
413                                                 continue;
414                                         st->devname = strdup(get_md_name(mse->devnum));
415                                         if ((fd = open(st->devname, O_RDONLY)) < 0 ||
416                                             ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
417                                                 /* no such array */
418                                                 if (fd >=0) close(fd);
419                                                 put_md_name(st->devname);
420                                                 free(st->devname);
421                                                 free(st);
422                                                 continue;
423                                         }
424                                         close(fd);
425                                         st->utime = 0;
426                                         st->next = statelist;
427                                         st->err = 1;
428                                         st->devnum = mse->devnum;
429                                         st->percent = -2;
430                                         st->spare_group = NULL;
431                                         st->expected_spares = -1;
432                                         statelist = st;
433                                         alert("NewArray", st->devname, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
434                                         new_found = 1;
435                                 }
436                 }
437                 /* If an array has active < raid && spare == 0 && spare_group != NULL
438                  * Look for another array with spare > 0 and active == raid and same spare_group
439                  *  if found, choose a device and hotremove/hotadd
440                  */
441                 for (st = statelist; st; st=st->next)
442                         if (st->active < st->raid &&
443                             st->spare == 0 &&
444                             st->spare_group != NULL) {
445                                 struct state *st2;
446                                 for (st2=statelist ; st2 ; st2=st2->next)
447                                         if (st2 != st &&
448                                             st2->spare > 0 &&
449                                             st2->active == st2->raid &&
450                                             st2->spare_group != NULL &&
451                                             strcmp(st->spare_group, st2->spare_group) == 0) {
452                                                 /* try to remove and add */
453                                                 int fd1 = open(st->devname, O_RDONLY);
454                                                 int fd2 = open(st2->devname, O_RDONLY);
455                                                 int dev = -1;
456                                                 int d;
457                                                 if (fd1 < 0 || fd2 < 0) {
458                                                         if (fd1>=0) close(fd1);
459                                                         if (fd2>=0) close(fd2);
460                                                         continue;
461                                                 }
462                                                 for (d=st2->raid; d < MaxDisks; d++) {
463                                                         if (st2->devid[d] > 0 &&
464                                                             st2->devstate[d] == 0) {
465                                                                 dev = st2->devid[d];
466                                                                 break;
467                                                         }
468                                                 }
469                                                 if (dev > 0) {
470                                                         if (ioctl(fd2, HOT_REMOVE_DISK,
471                                                                   (unsigned long)dev) == 0) {
472                                                                 if (ioctl(fd1, HOT_ADD_DISK,
473                                                                           (unsigned long)dev) == 0) {
474                                                                         alert("MoveSpare", st->devname, st2->devname, mailaddr, mailfrom, alert_cmd, dosyslog);
475                                                                         close(fd1);
476                                                                         close(fd2);
477                                                                         break;
478                                                                 }
479                                                                 else ioctl(fd2, HOT_ADD_DISK, (unsigned long) dev);
480                                                         }
481                                                 }
482                                                 close(fd1);
483                                                 close(fd2);
484                                         }
485                         }
486                 if (!new_found) {
487                         if (oneshot)
488                                 break;
489                         else
490                                 mdstat_wait(period);
491                 }
492                 test = 0;
493         }
494         if (pidfile)
495                 unlink(pidfile);
496         return 0;
497 }
498
499
500 static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom, char *cmd,
501                   int dosyslog)
502 {
503         int priority;
504
505         if (!cmd && !mailaddr) {
506                 time_t now = time(0);
507
508                 printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device");
509         }
510         if (cmd) {
511                 int pid = fork();
512                 switch(pid) {
513                 default:
514                         waitpid(pid, NULL, 0);
515                         break;
516                 case -1:
517                         break;
518                 case 0:
519                         execl(cmd, cmd, event, dev, disc, NULL);
520                         exit(2);
521                 }
522         }
523         if (mailaddr &&
524             (strncmp(event, "Fail", 4)==0 ||
525              strncmp(event, "Test", 4)==0 ||
526              strncmp(event, "Spares", 6)==0 ||
527              strncmp(event, "Degrade", 7)==0)) {
528                 FILE *mp = popen(Sendmail, "w");
529                 if (mp) {
530                         FILE *mdstat;
531                         char hname[256];
532                         gethostname(hname, sizeof(hname));
533                         signal(SIGPIPE, SIG_IGN);
534                         if (mailfrom)
535                                 fprintf(mp, "From: %s\n", mailfrom);
536                         else
537                                 fprintf(mp, "From: " Name " monitoring <root>\n");
538                         fprintf(mp, "To: %s\n", mailaddr);
539                         fprintf(mp, "Subject: %s event on %s:%s\n\n", event, dev, hname);
540
541                         fprintf(mp, "This is an automatically generated mail message from " Name "\n");
542                         fprintf(mp, "running on %s\n\n", hname);
543
544                         fprintf(mp, "A %s event had been detected on md device %s.\n\n", event, dev);
545
546                         if (disc && disc[0] != ' ')
547                                 fprintf(mp, "It could be related to component device %s.\n\n", disc);
548                         if (disc && disc[0] == ' ')
549                                 fprintf(mp, "Extra information:%s.\n\n", disc);
550
551                         fprintf(mp, "Faithfully yours, etc.\n");
552
553                         mdstat = fopen("/proc/mdstat", "r");
554                         if (mdstat) {
555                                 char buf[8192];
556                                 int n;
557                                 fprintf(mp, "\nP.S. The /proc/mdstat file currently contains the following:\n\n");
558                                 while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0)
559                                         n=fwrite(buf, 1, n, mp); /* yes, i don't care about the result */
560                                 fclose(mdstat);
561                         }
562                         fclose(mp);
563                 }
564
565         }
566
567         /* log the event to syslog maybe */
568         if (dosyslog) {
569                 /* Log at a different severity depending on the event.
570                  *
571                  * These are the critical events:  */
572                 if (strncmp(event, "Fail", 4)==0 ||
573                     strncmp(event, "Degrade", 7)==0 ||
574                     strncmp(event, "DeviceDisappeared", 17)==0)
575                         priority = LOG_CRIT;
576                 /* Good to know about, but are not failures: */
577                 else if (strncmp(event, "Rebuild", 7)==0 ||
578                          strncmp(event, "MoveSpare", 9)==0 ||
579                          strncmp(event, "Spares", 6) != 0)
580                         priority = LOG_WARNING;
581                 /* Everything else: */
582                 else
583                         priority = LOG_INFO;
584
585                 if (disc)
586                         syslog(priority, "%s event detected on md device %s, component device %s", event, dev, disc);
587                 else
588                         syslog(priority, "%s event detected on md device %s", event, dev);
589         }
590 }
591
592 /* Not really Monitor but ... */
593 int Wait(char *dev)
594 {
595         struct stat stb;
596         int devnum;
597         int rv = 1;
598
599         if (stat(dev, &stb) != 0) {
600                 fprintf(stderr, Name ": Cannot find %s: %s\n", dev,
601                         strerror(errno));
602                 return 2;
603         }
604         if (major(stb.st_rdev) == MD_MAJOR)
605                 devnum = minor(stb.st_rdev);
606         else
607                 devnum = -1-(minor(stb.st_rdev)/64);
608
609         while(1) {
610                 struct mdstat_ent *ms = mdstat_read(1, 0);
611                 struct mdstat_ent *e;
612
613                 for (e=ms ; e; e=e->next)
614                         if (e->devnum == devnum)
615                                 break;
616
617                 if (!e || e->percent < 0) {
618                         free_mdstat(ms);
619                         return rv;
620                 }
621                 free(ms);
622                 rv = 0;
623                 mdstat_wait(5);
624         }
625 }