]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
mdcheck: new script to help with regular checks of md arrays.
authorNeilBrown <neilb@suse.de>
Thu, 22 May 2014 06:00:39 +0000 (16:00 +1000)
committerNeilBrown <neilb@suse.de>
Thu, 22 May 2014 06:00:39 +0000 (16:00 +1000)
This script allows arrays to be 'checked' for a limited amount
of time on a regular basis.

For example, running

 mdcheck --duration 6hours

early every Sunday morning and

 mdcheck --continue 6hours

ever other morning will check all arrays every week, but if that take
more than 6 hours, will won't run into the day, but will be continued
the next morning, and the next ... etc.

Signed-off-by: NeilBrown <neilb@suse.de>
misc/mdcheck [new file with mode: 0644]

diff --git a/misc/mdcheck b/misc/mdcheck
new file mode 100644 (file)
index 0000000..60d8501
--- /dev/null
@@ -0,0 +1,158 @@
+#!/bin/bash
+
+# Copyright (C) 2014 Neil Brown <neilb@suse.de>
+#
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    Author: Neil Brown
+#    Email: <neilb@suse.de>
+
+# This script should be run periodically to automatically
+# perform a 'check' on any md arrays.
+#
+# It supports a 'time budget' such that any incomplete 'check'
+# will be checkpointed when that time has expired.
+# A subsequent invocation can allow the 'check' to continue.
+#
+# Options are:
+#   --continue    Don't start new checks, only continue old ones.
+#   --duration    This is passed to "date --date=+$duration" to find out
+#                when to finish
+#
+# To support '--continue', arrays are identified by UUID and the 'sync_completed'
+# value is stored  in /var/lib/mdcheck/$UUID
+
+# convert a /dev/md name into /sys/.../md equivalent
+sysname() {
+       set `ls -lLd $1`
+       maj=${5%,}
+       min=$6
+       readlink -f /sys/dev/block/$maj:$min
+}
+
+args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
+rv=$?
+if [ $rv -ne 0 ]; then exit $rv; fi
+
+eval set -- $args
+
+cont=
+endtime=
+while [ " $1" != " --" ]
+do
+    case $1 in
+       --help )
+               echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
+               echo >&2 '  time-offset must be understood by "date --date"'
+               exit 0
+               ;;
+       --continue ) cont=yes ;;
+       --duration ) shift; dur=$1
+               endtime=$(date --date "+$dur" "+%s")
+               ;;
+    esac
+    shift
+done
+shift
+
+# We need a temp file occasionally...
+tmp=/var/lib/mdcheck/.md-check-$$
+trap 'rm -f "$tmp"' 0
+
+
+# firstly, clean out really old state files
+mkdir -p /var/lib/mdcheck
+find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;
+
+# Now look at each md device.
+cnt=0
+for dev in /dev/md?*
+do
+       sys=`sysname $dev`
+       if [ ! -f "$sys/md/sync_action" ]
+       then # cannot check this array
+               continue
+       fi
+       if [ "`cat $sys/md/sync_action`" != 'idle' ]
+       then # This array is busy
+               continue
+       fi
+
+       mdadm --detail --export "$dev" > $tmp || continue
+       source $tmp
+       fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
+       if [ -z "$cont" ]
+       then
+               start=0
+       elif [ -z "$MD_UUID" -o ! -f "$fl" ]
+       then
+               # Nothing to continue here
+               continue
+       else
+               start=`cat "$fl"`
+       fi
+
+       cnt=$[cnt+1]
+       eval MD_${cnt}_fl=\$fl
+       eval MD_${cnt}_sys=\$sys
+       echo $start > $fl
+       echo $start > $sys/md/sync_min
+       echo check > $sys/md/sync_action
+done
+
+if [ -z "$endtime" ]
+then
+       exit 0
+fi
+
+while [ `date +%s` -lt $endtime ]
+do
+       any=
+       for i in `eval echo {1..$cnt}`
+       do
+               eval fl=\$MD_${i}_fl
+               eval sys=\$MD_${i}_sys
+
+               if [ -z "$fl" ]; then continue; fi
+
+               if [ "`cat $sys/md/sync_action`" != 'check' ]
+               then
+                       eval MD_${i}_fl=
+                       rm -f $fl
+                       continue;
+               fi
+               read a rest < $sys/md/sync_completed
+               echo $a > $fl
+               any=yes
+       done
+       if [ -z "$any" ]; then exit 0; fi
+       sleep 120
+done
+
+# We've waited, and there are still checks running.
+# Time to stop them.
+for i in `eval echo {1..$cnt}`
+do
+       eval fl=\$MD_${i}_fl
+       eval sys=\$MD_${i}_sys
+
+       if [ -z "$fl" ]; then continue; fi
+
+       if [ "`cat $sys/md/sync_action`" != 'check' ]
+       then
+               eval MD_${i}_fl=
+               rm -f $fl
+               continue;
+       fi
+       echo idle > $sys/md/sync_action
+       cat $sys/md/sync_min > $fl
+done