]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - monitor.c
imsm: fix up compare_super_imsm() to match family_num for populated mpb's
[thirdparty/mdadm.git] / monitor.c
index 524411e3a290140e7021be47dec4b8b11b7761cf..45b5d5b5551bfa91e0ef80359ea088b471567aaf 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -48,7 +48,7 @@ static int read_attr(char *buf, int len, int fd)
 }
 
 
-static int get_resync_start(struct active_array *a)
+int get_resync_start(struct active_array *a)
 {
        char buf[30];
        int n;
@@ -220,6 +220,7 @@ static int read_and_act(struct active_array *a)
        if (a->curr_state <= inactive &&
            a->prev_state > inactive) {
                /* array has been stopped */
+               get_resync_start(a);
                a->container->ss->set_array_state(a, 1);
                a->next_state = clear;
                deactivate = 1;
@@ -237,21 +238,25 @@ static int read_and_act(struct active_array *a)
        }
 
        if (a->curr_state == readonly) {
-               /* Well, I'm ready to handle things, so
-                * read-auto is OK. FIXME what if we really want
-                * readonly ???
+               /* Well, I'm ready to handle things.  If readonly
+                * wasn't requested, transition to read-auto.
                 */
-               get_resync_start(a);
-               printf("Found a readonly array at %llu\n", a->resync_start);
-               if (a->resync_start == ~0ULL)
-                       a->next_state = read_auto; /* array is clean */
-               else {
-                       a->container->ss->set_array_state(a, 0);
-                       a->next_state = active;
+               char buf[64];
+               read_attr(buf, sizeof(buf), a->metadata_fd);
+               if (strncmp(buf, "external:-", 10) == 0) {
+                       /* explicit request for readonly array.  Leave it alone */
+                       ;
+               } else {
+                       get_resync_start(a);
+                       if (a->container->ss->set_array_state(a, 2))
+                               a->next_state = read_auto; /* array is clean */
+                       else
+                               a->next_state = active; /* Now active for recovery etc */
                }
        }
 
-       if (a->curr_action == idle &&
+       if (!deactivate &&
+           a->curr_action == idle &&
            a->prev_action == resync) {
                /* A resync has finished.  The endpoint is recorded in
                 * 'sync_start'.  We don't update the metadata
@@ -263,8 +268,12 @@ static int read_and_act(struct active_array *a)
                check_degraded = 1;
        }
 
-       if (a->curr_action == idle &&
+       if (!deactivate &&
+           a->curr_action == idle &&
            a->prev_action == recover) {
+               /* A recovery has finished.  Some disks may be in sync now,
+                * and the array may no longer be degraded
+                */
                for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
                        a->container->ss->set_disk(a, mdi->disk.raid_disk,
                                                   mdi->curr_state);
@@ -273,12 +282,25 @@ static int read_and_act(struct active_array *a)
                }
        }
 
+       /* Check for failures and if found:
+        * 1/ Record the failure in the metadata and unblock the device.
+        *    FIXME update the kernel to stop notifying on failed drives when
+        *    the array is readonly and we have cleared 'blocked'
+        * 2/ Try to remove the device if the array is writable, or can be
+        *    made writable.
+        */
        for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
                if (mdi->curr_state & DS_FAULTY) {
                        a->container->ss->set_disk(a, mdi->disk.raid_disk,
                                                   mdi->curr_state);
                        check_degraded = 1;
-                       mdi->next_state = DS_REMOVE;
+                       mdi->next_state |= DS_UNBLOCK;
+                       if (a->curr_state == read_auto) {
+                               a->container->ss->set_array_state(a, 0);
+                               a->next_state = active;
+                       }
+                       if (a->curr_state > readonly)
+                               mdi->next_state |= DS_REMOVE;
                }
        }
 
@@ -295,15 +317,18 @@ static int read_and_act(struct active_array *a)
                dprintf(" action:%s", array_states[a->next_state]);
        }
        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
-               if (mdi->next_state == DS_REMOVE && mdi->state_fd >= 0) {
+               if (mdi->next_state & DS_UNBLOCK) {
+                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
+                       write_attr("-blocked", mdi->state_fd);
+               }
+
+               if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) {
                        int remove_result;
 
-                       write_attr("-blocked", mdi->state_fd);
                        /* the kernel may not be able to immediately remove the
                         * disk, we can simply wait until the next event to try
                         * again.
                         */
-                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
                        remove_result = write_attr("remove", mdi->state_fd);
                        if (remove_result > 0) {
                                dprintf(" %d:removed", mdi->disk.raid_disk);
@@ -399,6 +424,8 @@ static void dprint_wake_reasons(fd_set *fds)
 }
 #endif
 
+int monitor_loop_cnt;
+
 static int wait_and_act(struct supertype *container, int nowait)
 {
        fd_set rfds;
@@ -445,9 +472,9 @@ static int wait_and_act(struct supertype *container, int nowait)
                if (fd >= 0 || errno != EBUSY) {
                        /* OK, we are safe to leave */
                        dprintf("no arrays to monitor... exiting\n");
+                       remove_pidfile(container->devname);
                        exit_now = 1;
                        signal_manager();
-                       remove_pidfile(container->devname);
                        exit(0);
                }
        }
@@ -456,8 +483,11 @@ static int wait_and_act(struct supertype *container, int nowait)
                sigset_t set;
                sigprocmask(SIG_UNBLOCK, NULL, &set);
                sigdelset(&set, SIGUSR1);
+               monitor_loop_cnt |= 1;
                rv = pselect(maxfd+1, &rfds, NULL, NULL, NULL, &set);
-
+               monitor_loop_cnt += 1;
+               if (rv == -1 && errno == EINTR)
+                       rv = 0;
                #ifdef DEBUG
                dprint_wake_reasons(&rfds);
                #endif