]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
raid6check: Auto-repair mode
authorRobert Buchholz <rbu@goodpoint.de>
Mon, 10 Sep 2012 07:28:21 +0000 (17:28 +1000)
committerNeilBrown <neilb@suse.de>
Mon, 10 Sep 2012 07:28:21 +0000 (17:28 +1000)
When calling raid6check in regular scanning mode, specifiying
"autorepair" as the last positional parameter will cause it
to automatically repair any single slot failes it identifies.

Signed-off-by: NeilBrown <neilb@suse.de>
raid6check.c
tests/19raid6auto-repair [new file with mode: 0644]

index 4aeafad46058f5a393a485aba1841cb6e46ef8b7..e9a17a7caa037bb60a53594bb3a7aee81bd9cc7e 100644 (file)
@@ -281,6 +281,35 @@ int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
                        lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0);
                        write(source[failed_disk2], stripes[failed_disk2], chunk_size);
 
+                       err = unlock_all_stripes(info, sig);
+                       if(err != 0)
+                               goto exitCheck;
+               } else if (disk >= 0 && repair == 2) {
+                       printf("Auto-repairing slot %d (%s)\n", disk, name[disk]);
+                       if (disk == diskQ) {
+                               qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
+                       } else {
+                               char *all_but_failed_blocks[data_disks];
+                               int failed_block_index = block_index_for_slot[disk];
+                               for (i=0; i < data_disks; i++)
+                                       if (failed_block_index == i)
+                                               all_but_failed_blocks[i] = stripes[diskP];
+                                       else
+                                               all_but_failed_blocks[i] = blocks[i];
+                               xor_blocks(stripes[disk],
+                                       all_but_failed_blocks, data_disks, chunk_size);
+                       }
+
+                       err = lock_stripe(info, start, chunk_size, data_disks, sig);
+                       if(err != 0) {
+                               if (err != 2)
+                                       unlock_all_stripes(info, sig);
+                               goto exitCheck;
+                       }
+
+                       lseek64(source[disk], offsets[disk] + start * chunk_size, 0);
+                       write(source[disk], stripes[disk], chunk_size);
+
                        err = unlock_all_stripes(info, sig);
                        if(err != 0)
                                goto exitCheck;
@@ -343,7 +372,7 @@ int main(int argc, char *argv[])
                prg++;
 
        if (argc < 4) {
-               fprintf(stderr, "Usage: %s md_device start_stripe length_stripes\n", prg);
+               fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
                fprintf(stderr, "   or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
                exit_err = 1;
                goto exitHere;
@@ -441,6 +470,8 @@ int main(int argc, char *argv[])
        else {
                start = getnum(argv[2], &err);
                length = getnum(argv[3], &err);
+               if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
+                       repair = 2;
        }
 
        if (err) {
diff --git a/tests/19raid6auto-repair b/tests/19raid6auto-repair
new file mode 100644 (file)
index 0000000..6665458
--- /dev/null
@@ -0,0 +1,43 @@
+number_of_disks=5
+chunksize_in_kib=512
+chunksize_in_b=$[chunksize_in_kib*1024]
+array_data_size_in_kib=$[chunksize_in_kib*(number_of_disks-2)*number_of_disks]
+array_data_size_in_b=$[array_data_size_in_kib*1024]
+devs="$dev0 $dev1 $dev2 $dev3 $dev4"
+
+# default 32 sectors
+data_offset_in_kib=$[32/2]
+
+# make a raid5 from a file
+dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$array_data_size_in_kib
+mdadm -CR $md0 -l6 -n$number_of_disks -c $chunksize_in_kib $devs
+dd if=/tmp/RandFile of=$md0 bs=1024 count=$array_data_size_in_kib
+blockdev --flushbufs $md0; sync
+check wait
+blockdev --flushbufs $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo sanity cmp failed ; exit 2; }
+
+# wipe out 5 chunks on each device
+dd if=/dev/urandom of=$dev0 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*0]
+dd if=/dev/urandom of=$dev1 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*5]
+dd if=/dev/urandom of=$dev2 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*10]
+dd if=/dev/urandom of=$dev3 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*15]
+dd if=/dev/urandom of=$dev4 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*20]
+
+blockdev --flushbufs $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+
+$dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" || { echo should detect errors; exit 2; }
+
+$dir/raid6check $md0 0 0 autorepair > /dev/null || { echo repair failed; exit 2; }
+blockdev --flushbufs $md0 $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+
+$dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" && { echo errors detected; exit 2; }
+cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo cmp failed ; exit 2; }
+
+mdadm -S $md0
+udevadm settle
+blockdev --flushbufs $md0 $devs; sync
+echo 3 > /proc/sys/vm/drop_caches