From df881f757b3d61e5dfc1a2b23436ab1cf5247157 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 22 May 2014 16:00:39 +1000 Subject: [PATCH] mdcheck: new script to help with regular checks of md arrays. This script allows arrays to be 'checked' for a limited amount of time on a regular basis. For example, running mdcheck --duration 6hours early every Sunday morning and mdcheck --continue 6hours ever other morning will check all arrays every week, but if that take more than 6 hours, will won't run into the day, but will be continued the next morning, and the next ... etc. Signed-off-by: NeilBrown --- misc/mdcheck | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 misc/mdcheck diff --git a/misc/mdcheck b/misc/mdcheck new file mode 100644 index 00000000..60d8501f --- /dev/null +++ b/misc/mdcheck @@ -0,0 +1,158 @@ +#!/bin/bash + +# Copyright (C) 2014 Neil Brown +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Neil Brown +# Email: + +# This script should be run periodically to automatically +# perform a 'check' on any md arrays. +# +# It supports a 'time budget' such that any incomplete 'check' +# will be checkpointed when that time has expired. +# A subsequent invocation can allow the 'check' to continue. +# +# Options are: +# --continue Don't start new checks, only continue old ones. +# --duration This is passed to "date --date=+$duration" to find out +# when to finish +# +# To support '--continue', arrays are identified by UUID and the 'sync_completed' +# value is stored in /var/lib/mdcheck/$UUID + +# convert a /dev/md name into /sys/.../md equivalent +sysname() { + set `ls -lLd $1` + maj=${5%,} + min=$6 + readlink -f /sys/dev/block/$maj:$min +} + +args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@") +rv=$? +if [ $rv -ne 0 ]; then exit $rv; fi + +eval set -- $args + +cont= +endtime= +while [ " $1" != " --" ] +do + case $1 in + --help ) + echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]' + echo >&2 ' time-offset must be understood by "date --date"' + exit 0 + ;; + --continue ) cont=yes ;; + --duration ) shift; dur=$1 + endtime=$(date --date "+$dur" "+%s") + ;; + esac + shift +done +shift + +# We need a temp file occasionally... +tmp=/var/lib/mdcheck/.md-check-$$ +trap 'rm -f "$tmp"' 0 + + +# firstly, clean out really old state files +mkdir -p /var/lib/mdcheck +find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \; + +# Now look at each md device. +cnt=0 +for dev in /dev/md?* +do + sys=`sysname $dev` + if [ ! -f "$sys/md/sync_action" ] + then # cannot check this array + continue + fi + if [ "`cat $sys/md/sync_action`" != 'idle' ] + then # This array is busy + continue + fi + + mdadm --detail --export "$dev" > $tmp || continue + source $tmp + fl="/var/lib/mdcheck/MD_UUID_$MD_UUID" + if [ -z "$cont" ] + then + start=0 + elif [ -z "$MD_UUID" -o ! -f "$fl" ] + then + # Nothing to continue here + continue + else + start=`cat "$fl"` + fi + + cnt=$[cnt+1] + eval MD_${cnt}_fl=\$fl + eval MD_${cnt}_sys=\$sys + echo $start > $fl + echo $start > $sys/md/sync_min + echo check > $sys/md/sync_action +done + +if [ -z "$endtime" ] +then + exit 0 +fi + +while [ `date +%s` -lt $endtime ] +do + any= + for i in `eval echo {1..$cnt}` + do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + eval MD_${i}_fl= + rm -f $fl + continue; + fi + read a rest < $sys/md/sync_completed + echo $a > $fl + any=yes + done + if [ -z "$any" ]; then exit 0; fi + sleep 120 +done + +# We've waited, and there are still checks running. +# Time to stop them. +for i in `eval echo {1..$cnt}` +do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + eval MD_${i}_fl= + rm -f $fl + continue; + fi + echo idle > $sys/md/sync_action + cat $sys/md/sync_min > $fl +done -- 2.39.2