]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
Grow: another attempt to fix stop-during-reshape race.
authorNeilBrown <neilb@suse.de>
Mon, 25 May 2015 06:33:45 +0000 (16:33 +1000)
committerNeilBrown <neilb@suse.de>
Mon, 25 May 2015 06:33:45 +0000 (16:33 +1000)
When the array is stopped during a critical section, we sometimes
erase the backup, which is bad.
This happens when 'completed' is zero.
This can happen easily when 'stop' freezes reshape.

So try to be more careful and check 'reshape_position'.

Signed-off-by: NeilBrown <neilb@suse.de>
Grow.c

diff --git a/Grow.c b/Grow.c
index f2cf46a64247540e26ee6a05f8170bad129a9f56..a20ff3e70142b7edc141190909443d81de5099db 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -3858,27 +3858,30 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
        }
        /* Some kernels reset 'sync_completed' to zero,
         * we need to have real point we are in md.
-        * But only if array is actually still reshaping,
-        * not stopped.
+        * So in that case, read 'reshape_position' from sysfs.
         */
        if (completed == 0) {
+               unsigned long long reshapep;
                char action[20];
                if (sysfs_get_str(info, NULL, "sync_action",
                                  action, 20) > 0 &&
-                   strncmp(action, "idle", 4) == 0)
-                       completed = max_progress;
-       }
-
-       /* some kernels can give an incorrectly high 'completed' number */
-       completed /= (info->new_chunk/512);
-       completed *= (info->new_chunk/512);
-       /* Convert 'completed' back in to a 'progress' number */
-       completed *= reshape->after.data_disks;
-       if (!advancing) {
-               completed = info->component_size * reshape->after.data_disks
-                       - completed;
+                   strncmp(action, "idle", 4) == 0 &&
+                   sysfs_get_ll(info, NULL,
+                                "reshape_position", &reshapep) == 0)
+                       *reshape_completed = reshapep;
+       } else {
+               /* some kernels can give an incorrectly high
+                * 'completed' number, so round down */
+               completed /= (info->new_chunk/512);
+               completed *= (info->new_chunk/512);
+               /* Convert 'completed' back in to a 'progress' number */
+               completed *= reshape->after.data_disks;
+               if (!advancing)
+                       completed = (info->component_size
+                                    * reshape->after.data_disks
+                                    - completed);
+               *reshape_completed = completed;
        }
-       *reshape_completed = completed;
 
        close(fd);
 
@@ -3898,7 +3901,6 @@ check_progress:
         * it was just a device failure that leaves us degraded but
         * functioning.
         */
-       strcpy(buf, "hi");
        if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0
            || strncmp(buf, "none", 4) != 0) {
                /* The abort might only be temporary.  Wait up to 10