]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - monitor.c
Factor out test for subarray version string.
[thirdparty/mdadm.git] / monitor.c
index 46d5e0a00380672285a2667c140e8324cc545721..900cba3cf5404abc996c666609e475bd3c018062 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -48,7 +48,7 @@ static int read_attr(char *buf, int len, int fd)
 }
 
 
-static int get_resync_start(struct active_array *a)
+int get_resync_start(struct active_array *a)
 {
        char buf[30];
        int n;
@@ -220,6 +220,7 @@ static int read_and_act(struct active_array *a)
        if (a->curr_state <= inactive &&
            a->prev_state > inactive) {
                /* array has been stopped */
+               get_resync_start(a);
                a->container->ss->set_array_state(a, 1);
                a->next_state = clear;
                deactivate = 1;
@@ -242,16 +243,14 @@ static int read_and_act(struct active_array *a)
                 * readonly ???
                 */
                get_resync_start(a);
-//             printf("Found a readonly array at %llu\n", a->resync_start);
-               if (a->resync_start == ~0ULL)
+               if (a->container->ss->set_array_state(a, 2))
                        a->next_state = read_auto; /* array is clean */
-               else {
-                       a->container->ss->set_array_state(a, 0);
-                       a->next_state = active;
-               }
+               else
+                       a->next_state = active; /* Now active for recovery etc */
        }
 
-       if (a->curr_action == idle &&
+       if (!deactivate &&
+           a->curr_action == idle &&
            a->prev_action == resync) {
                /* A resync has finished.  The endpoint is recorded in
                 * 'sync_start'.  We don't update the metadata
@@ -263,8 +262,12 @@ static int read_and_act(struct active_array *a)
                check_degraded = 1;
        }
 
-       if (a->curr_action == idle &&
+       if (!deactivate &&
+           a->curr_action == idle &&
            a->prev_action == recover) {
+               /* A recovery has finished.  Some disks may be in sync now,
+                * and the array may no longer be degraded
+                */
                for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
                        a->container->ss->set_disk(a, mdi->disk.raid_disk,
                                                   mdi->curr_state);
@@ -273,12 +276,25 @@ static int read_and_act(struct active_array *a)
                }
        }
 
+       /* Check for failures and if found:
+        * 1/ Record the failure in the metadata and unblock the device.
+        *    FIXME update the kernel to stop notifying on failed drives when
+        *    the array is readonly and we have cleared 'blocked'
+        * 2/ Try to remove the device if the array is writable, or can be
+        *    made writable.
+        */
        for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
                if (mdi->curr_state & DS_FAULTY) {
                        a->container->ss->set_disk(a, mdi->disk.raid_disk,
                                                   mdi->curr_state);
                        check_degraded = 1;
-                       mdi->next_state = DS_REMOVE;
+                       mdi->next_state |= DS_UNBLOCK;
+                       if (a->curr_state == read_auto) {
+                               a->container->ss->set_array_state(a, 0);
+                               a->next_state = active;
+                       }
+                       if (a->curr_state > readonly)
+                               mdi->next_state |= DS_REMOVE;
                }
        }
 
@@ -295,15 +311,18 @@ static int read_and_act(struct active_array *a)
                dprintf(" action:%s", array_states[a->next_state]);
        }
        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
-               if (mdi->next_state == DS_REMOVE && mdi->state_fd >= 0) {
+               if (mdi->next_state & DS_UNBLOCK) {
+                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
+                       write_attr("-blocked", mdi->state_fd);
+               }
+
+               if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) {
                        int remove_result;
 
-                       write_attr("-blocked", mdi->state_fd);
                        /* the kernel may not be able to immediately remove the
                         * disk, we can simply wait until the next event to try
                         * again.
                         */
-                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
                        remove_result = write_attr("remove", mdi->state_fd);
                        if (remove_result > 0) {
                                dprintf(" %d:removed", mdi->disk.raid_disk);
@@ -399,6 +418,8 @@ static void dprint_wake_reasons(fd_set *fds)
 }
 #endif
 
+int monitor_loop_cnt;
+
 static int wait_and_act(struct supertype *container, int nowait)
 {
        fd_set rfds;
@@ -456,7 +477,9 @@ static int wait_and_act(struct supertype *container, int nowait)
                sigset_t set;
                sigprocmask(SIG_UNBLOCK, NULL, &set);
                sigdelset(&set, SIGUSR1);
+               monitor_loop_cnt |= 1;
                rv = pselect(maxfd+1, &rfds, NULL, NULL, NULL, &set);
+               monitor_loop_cnt += 1;
                if (rv == -1 && errno == EINTR)
                        rv = 0;
                #ifdef DEBUG