]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Monitor.c
Monitor: set err on arrays not in mdstat
[thirdparty/mdadm.git] / Monitor.c
index af486d709c63d2a96d23803c47c3e7078ab71143..c4256815f74c3c2b49ef08f4179bf142575bf526 100644 (file)
--- a/Monitor.c
+++ b/Monitor.c
 static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom,
                  char *cmd, int dosyslog);
 
-static char *percentalerts[] = {
-       "RebuildStarted",
-       "Rebuild20",
-       "Rebuild40",
-       "Rebuild60",
-       "Rebuild80",
-};
-
 /* The largest number of disks current arrays can manage is 384
  * This really should be dynamically, but that will have to wait
  * At least it isn't MD_SB_DISKS.
  */
 #define MaxDisks 384
-int Monitor(mddev_dev_t devlist,
+int Monitor(struct mddev_dev *devlist,
            char *mailaddr, char *alert_cmd,
            int period, int daemonise, int scan, int oneshot,
-           int dosyslog, int test, char* pidfile)
+           int dosyslog, int test, char* pidfile, int increments)
 {
        /*
         * Every few seconds, scan every md device looking for changes
@@ -77,8 +69,8 @@ int Monitor(mddev_dev_t devlist,
         *      An active device had a reverse transition
         *    RebuildStarted
         *      percent went from -1 to +ve
-        *    Rebuild20 Rebuild40 Rebuild60 Rebuild80
-        *      percent went from below to not-below that number
+        *    RebuildNN
+        *      percent went from below to not-below NN%
         *    DeviceDisappeared
         *      Couldn't access a device which was previously visible
         *
@@ -102,7 +94,7 @@ int Monitor(mddev_dev_t devlist,
                int active, working, failed, spare, raid;
                int expected_spares;
                int devstate[MaxDisks];
-               int devid[MaxDisks];
+               unsigned devid[MaxDisks];
                int percent;
                struct state *next;
        } *statelist = NULL;
@@ -158,7 +150,7 @@ int Monitor(mddev_dev_t devlist,
        }
 
        if (devlist == NULL) {
-               mddev_ident_t mdlist = conf_get_ident(NULL);
+               struct mddev_ident *mdlist = conf_get_ident(NULL);
                for (; mdlist; mdlist=mdlist->next) {
                        struct state *st;
                        if (mdlist->devname == NULL)
@@ -188,9 +180,9 @@ int Monitor(mddev_dev_t devlist,
                        statelist = st;
                }
        } else {
-               mddev_dev_t dv;
+               struct mddev_dev *dv;
                for (dv=devlist ; dv; dv=dv->next) {
-                       mddev_ident_t mdlist = conf_get_ident(dv->devname);
+                       struct mddev_ident *mdlist = conf_get_ident(dv->devname);
                        struct state *st = malloc(sizeof *st);
                        if (st == NULL)
                                continue;
@@ -226,7 +218,7 @@ int Monitor(mddev_dev_t devlist,
                        struct mdstat_ent *mse = NULL, *mse2;
                        char *dev = st->devname;
                        int fd;
-                       unsigned int i;
+                       int i;
 
                        if (test)
                                alert("TestMessage", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
@@ -279,6 +271,14 @@ int Monitor(mddev_dev_t devlist,
                                        mse = mse2;
                                }
 
+                       if (!mse) {
+                               /* duplicated array in statelist
+                                * or re-created after reading mdstat*/
+                               st->err = 1;
+                               close(fd);
+                               continue;
+                       }
+                       /* this array is in /proc/mdstat */
                        if (array.utime == 0)
                                /* external arrays don't update utime */
                                array.utime = time(0);
@@ -295,7 +295,6 @@ int Monitor(mddev_dev_t devlist,
                                continue;
                        }
                        if (st->utime == 0 && /* new array */
-                           mse &&      /* is in /proc/mdstat */
                            mse->pattern && strchr(mse->pattern, '_') /* degraded */
                                )
                                alert("DegradedArray", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
@@ -304,19 +303,24 @@ int Monitor(mddev_dev_t devlist,
                            st->expected_spares > 0 &&
                            array.spare_disks < st->expected_spares)
                                alert("SparesMissing", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
-                       if (mse &&
-                           st->percent == -1 &&
+                       if (st->percent == -1 &&
                            mse->percent >= 0)
                                alert("RebuildStarted", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
-                       if (mse &&
-                           st->percent >= 0 &&
+                       if (st->percent >= 0 &&
                            mse->percent >= 0 &&
-                           (mse->percent / 20) > (st->percent / 20))
-                               alert(percentalerts[mse->percent/20],
+                           (mse->percent / increments) > (st->percent / increments)) {
+                               char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
+
+                               if((mse->percent / increments) == 0)
+                                       snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
+                               else
+                                       snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
+
+                               alert(percentalert,
                                      dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
+                       }
 
-                       if (mse &&
-                           mse->percent == -1 &&
+                       if (mse->percent == -1 &&
                            st->percent >= 0) {
                                /* Rebuild/sync/whatever just finished.
                                 * If there is a number in /mismatch_cnt,
@@ -333,10 +337,7 @@ int Monitor(mddev_dev_t devlist,
                                if (sra)
                                        free(sra);
                        }
-
-                       if (mse)
-                               st->percent = mse->percent;
-
+                       st->percent = mse->percent;
 
                        for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
                             i++) {
@@ -352,7 +353,7 @@ int Monitor(mddev_dev_t devlist,
                        close(fd);
 
                        for (i=0; i<MaxDisks; i++) {
-                               mdu_disk_info_t disc = {0};
+                               mdu_disk_info_t disc = {0,0,0,0,0};
                                int newstate=0;
                                int change;
                                char *dv = NULL;
@@ -366,7 +367,7 @@ int Monitor(mddev_dev_t devlist,
                                        disc.state = newstate;
                                        disc.major = info[i].major;
                                        disc.minor = info[i].minor;
-                               } else if (mse &&  mse->pattern && i < strlen(mse->pattern)) {
+                               } else if (mse &&  mse->pattern && i < (int)strlen(mse->pattern)) {
                                        switch(mse->pattern[i]) {
                                        case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
                                        case '_': newstate = 0; break;
@@ -378,19 +379,20 @@ int Monitor(mddev_dev_t devlist,
                                                     minor(st->devid[i]), 1);
                                change = newstate ^ st->devstate[i];
                                if (st->utime && change && !st->err) {
-                                       if (i < (unsigned)array.raid_disks &&
+                                       if (i < array.raid_disks &&
                                            (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
                                             ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
                                             ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
                                                )
                                                alert("Fail", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
-                                       else if (i >= (unsigned)array.raid_disks &&
+                                       else if (i >= array.raid_disks &&
                                                 (disc.major || disc.minor) &&
                                                 st->devid[i] == makedev(disc.major, disc.minor) &&
                                                 ((newstate&change)&(1<<MD_DISK_FAULTY))
                                                )
                                                alert("FailSpare", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
-                                       else if (i < (unsigned)array.raid_disks &&
+                                       else if (i < array.raid_disks &&
+                                                ! (newstate & (1<<MD_DISK_REMOVED)) &&
                                                 (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
                                                  ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
                                                  ((newstate&change)&(1<<MD_DISK_SYNC)))
@@ -480,7 +482,7 @@ int Monitor(mddev_dev_t devlist,
                                                        }
                                                }
                                                if (dev > 0) {
-                                                       struct mddev_dev_s devlist;
+                                                       struct mddev_dev devlist;
                                                        char devname[20];
                                                        devlist.next = NULL;
                                                        devlist.used = 0;
@@ -490,15 +492,15 @@ int Monitor(mddev_dev_t devlist,
                                                        sprintf(devname, "%d:%d", major(dev), minor(dev));
 
                                                        devlist.disposition = 'r';
-                                                       if (Manage_subdevs(st2->devname, fd2, &devlist, -1) == 0) {
+                                                       if (Manage_subdevs(st2->devname, fd2, &devlist, -1, 0) == 0) {
                                                                devlist.disposition = 'a';
-                                                               if (Manage_subdevs(st->devname, fd1, &devlist, -1) == 0) {
+                                                               if (Manage_subdevs(st->devname, fd1, &devlist, -1, 0) == 0) {
                                                                        alert("MoveSpare", st->devname, st2->devname, mailaddr, mailfrom, alert_cmd, dosyslog);
                                                                        close(fd1);
                                                                        close(fd2);
                                                                        break;
                                                                }
-                                                               else Manage_subdevs(st2->devname, fd2, &devlist, -1);
+                                                               else Manage_subdevs(st2->devname, fd2, &devlist, -1, 0);
                                                        }
                                                }
                                                close(fd1);