]> git.ipfire.org Git - thirdparty/mdadm.git/blob - clustermd_tests/func.sh
Add one sanity check for missing device
[thirdparty/mdadm.git] / clustermd_tests / func.sh
1 #!/bin/bash
2
3 check_ssh()
4 {
5 NODE1="$(grep '^NODE1' $CLUSTER_CONF | cut -d'=' -f2)"
6 NODE2="$(grep '^NODE2' $CLUSTER_CONF | cut -d'=' -f2)"
7 [ -z "$NODE1" -o -z "$NODE2" ] && {
8 echo "Please provide node-ip in $CLUSTER_CONF."
9 exit 1
10 }
11 for ip in $NODE1 $NODE2
12 do
13 ssh -o NumberOfPasswordPrompts=0 $ip -l root "pwd" > /dev/null
14 [ $? -ne 0 ] && {
15 echo "Please setup ssh-access with no-authorized mode."
16 exit 1
17 }
18 done
19 }
20
21 fetch_devlist()
22 {
23 ISCSI_ID="$(grep '^ISCSI_TARGET_ID' $CLUSTER_CONF | cut -d'=' -f2)"
24 devlist="$(grep '^devlist' $CLUSTER_CONF | cut -d'=' -f2)"
25 if [ ! -z "$ISCSI_ID" -a ! -z "$devlist" ]
26 then
27 echo "Config ISCSI_TARGET_ID or devlist in $CLUSTER_CONF."
28 exit 1
29 elif [ ! -z "$ISCSI_ID" -a -z "$devlist" ]
30 then
31 for ip in $NODE1 $NODE2
32 do
33 ssh $ip "ls /dev/disk/by-path/*$ISCSI_ID*" > /dev/null
34 [ $? -ne 0 ] && {
35 echo "$ip: No disks found in '$ISCSI_ID' connection."
36 exit 1
37 }
38 done
39 devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*))
40 fi
41 # sbd disk cannot use in testing
42 for i in ${devlist[@]}
43 do
44 sbd -d $i dump &> /dev/null
45 [ $? -eq '0' ] && devlist=(${devlist[@]#$i})
46 done
47 for i in $(seq 0 ${#devlist[@]})
48 do
49 eval "dev$i=${devlist[$i]}"
50 done
51 [ "${#devlist[@]}" -lt 6 ] && {
52 echo "Cluster-md testing requires 6 disks at least."
53 exit 1
54 }
55 }
56
57 check_dlm()
58 {
59 if ! crm configure show | grep -q dlm
60 then
61 crm configure primitive dlm ocf:pacemaker:controld \
62 op monitor interval=60 timeout=60 \
63 meta target-role=Started &> /dev/null
64 crm configure group base-group dlm
65 crm configure clone base-clone base-group \
66 meta interleave=true
67 fi
68 sleep 1
69 for ip in $NODE1 $NODE2
70 do
71 ssh $ip "pgrep dlm_controld > /dev/null" || {
72 echo "$ip: dlm_controld daemon doesn't exist."
73 exit 1
74 }
75 done
76 crm_mon -r -n1 | grep -iq "fail\|not" && {
77 echo "Please clear cluster-resource errors."
78 exit 1
79 }
80 }
81
82 check_env()
83 {
84 user=$(id -un)
85 [ "X$user" = "Xroot" ] || {
86 echo "testing can only be done as 'root'."
87 exit 1
88 }
89 check_ssh
90 commands=(mdadm iscsiadm bc modinfo dlm_controld
91 udevadm crm crm_mon lsblk pgrep sbd)
92 mdadm_src_ver="$($mdadm -V 2>&1)"
93 for ip in $NODE1 $NODE2
94 do
95 for cmd in ${commands[@]}
96 do
97 ssh $ip "which $cmd &> /dev/null" || {
98 echo "$ip: $cmd, command not found!"
99 exit 1
100 }
101 done
102 mdadm_sbin_ver="$(ssh $ip "mdadm -V 2>&1")"
103 if [ "$mdadm_src_ver" != "$mdadm_sbin_ver" ]
104 then
105 echo "$ip: please run 'make install' before testing."
106 exit 1
107 fi
108 mods=(raid1 raid10 md_mod dlm md-cluster)
109 for mod in ${mods[@]}
110 do
111 ssh $ip "modinfo $mod > /dev/null" || {
112 echo "$ip: $mod, module doesn't exist."
113 exit 1
114 }
115 done
116 ssh $ip "lsblk -a | grep -iq raid"
117 [ $? -eq 0 ] && {
118 echo "$ip: Please run testing without running RAIDs environment."
119 exit 1
120 }
121 ssh $ip "modprobe md_mod"
122 done
123 fetch_devlist
124 check_dlm
125 [ -d $logdir ] || mkdir -p $logdir
126 }
127
128 # $1/node, $2/optional
129 stop_md()
130 {
131 if [ "$1" == "all" ]
132 then
133 NODES=($NODE1 $NODE2)
134 elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ]
135 then
136 NODES=$1
137 else
138 die "$1: unknown parameter."
139 fi
140 if [ -z "$2" ]
141 then
142 for ip in ${NODES[@]}
143 do
144 ssh $ip mdadm -Ssq
145 done
146 else
147 for ip in ${NODES[@]}
148 do
149 ssh $ip mdadm -S $2
150 done
151 fi
152 }
153
154 # $1/optional, it shows why to save log
155 save_log()
156 {
157 status=$1
158 logfile="$status""$_basename".log
159
160 cat $targetdir/stderr >> $targetdir/log
161 cp $targetdir/log $logdir/$_basename.log
162
163 for ip in $NODE1 $NODE2
164 do
165 echo "##$ip: saving dmesg." >> $logdir/$logfile
166 ssh $ip "dmesg -c" >> $logdir/$logfile
167 echo "##$ip: saving proc mdstat." >> $logdir/$logfile
168 ssh $ip "cat /proc/mdstat" >> $logdir/$logfile
169 array=($(ssh $ip "mdadm -Ds | cut -d' ' -f2"))
170
171 if [ ! -z "$array" -a ${#array[@]} -ge 1 ]
172 then
173 echo "##$ip: mdadm -D ${array[@]}" >> $logdir/$logfile
174 ssh $ip "mdadm -D ${array[@]}" >> $logdir/$logfile
175 md_disks=($(ssh $ip "mdadm -DY ${array[@]} | grep "/dev/" | cut -d'=' -f2"))
176 cat /proc/mdstat | grep -q "bitmap"
177 if [ $? -eq 0 ]
178 then
179 echo "##$ip: mdadm -X ${md_disks[@]}" >> $logdir/$logfile
180 ssh $ip "mdadm -X ${md_disks[@]}" >> $logdir/$logfile
181 fi
182 else
183 echo "##$ip: no array assembled!" >> $logdir/$logfile
184 fi
185 done
186 [ "$1" == "fail" ] &&
187 echo "See $logdir/$_basename.log and $logdir/$logfile for details"
188 stop_md all
189 }
190
191 do_setup()
192 {
193 check_env
194 ulimit -c unlimited
195 }
196
197 cleanup()
198 {
199 check_ssh
200 for ip in $NODE1 $NODE2
201 do
202 ssh $ip "mdadm -Ssq; dmesg -c > /dev/null"
203 done
204 mdadm --zero ${devlist[@]} &> /dev/null
205 }
206
207 # check: $1/cluster_node $2/feature $3/optional
208 check()
209 {
210 NODES=()
211 if [ "$1" == "all" ]
212 then
213 NODES=($NODE1 $NODE2)
214 elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ]
215 then
216 NODES=$1
217 else
218 die "$1: unknown parameter."
219 fi
220 case $2 in
221 spares )
222 for ip in ${NODES[@]}
223 do
224 spares=$(ssh $ip "tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)'")
225 [ "$spares" -ne "$3" ] &&
226 die "$ip: expected $3 spares, but found $spares"
227 done
228 ;;
229 raid* )
230 for ip in ${NODES[@]}
231 do
232 ssh $ip "grep -sq "$2" /proc/mdstat" ||
233 die "$ip: check '$2' failed."
234 done
235 ;;
236 PENDING | recovery | resync | reshape )
237 cnt=5
238 for ip in ${NODES[@]}
239 do
240 while ! ssh $ip "grep -sq '$2' /proc/mdstat"
241 do
242 if [ "$cnt" -gt '0' ]
243 then
244 sleep 0.2
245 cnt=$[cnt-1]
246 else
247 die "$ip: no '$2' happening!"
248 fi
249 done
250 done
251 ;;
252 wait )
253 local cnt=60
254 for ip in ${NODES[@]}
255 do
256 p=$(ssh $ip "cat /proc/sys/dev/raid/speed_limit_max")
257 ssh $ip "echo 200000 > /proc/sys/dev/raid/speed_limit_max"
258 while ssh $ip "grep -Esq '(resync|recovery|reshape|check|repair)' /proc/mdstat"
259 do
260 if [ "$cnt" -gt '0' ]
261 then
262 sleep 5
263 cnt=$[cnt-1]
264 else
265 die "$ip: Check '$2' timeout over 300 seconds."
266 fi
267 done
268 ssh $ip "echo $p > /proc/sys/dev/raid/speed_limit_max"
269 done
270 ;;
271 bitmap )
272 for ip in ${NODES[@]}
273 do
274 echo $ip
275 ssh $ip cat /proc/mdstat
276 ssh $ip "grep -sq '$2' /proc/mdstat" ||
277 die "$ip: no '$2' found in /proc/mdstat."
278 done
279 ;;
280 chunk )
281 for ip in ${NODES[@]}
282 do
283 chunk_size=`awk -F',' '/chunk/{print $2}' /proc/mdstat | awk -F'[a-z]' '{print $1}'`
284 [ "$chunk_size" -ne "$3" ] &&
285 die "$ip: chunksize should be $3, but it's $chunk_size"
286 done
287 ;;
288 state )
289 for ip in ${NODES[@]}
290 do
291 ssh $ip "grep -Esq 'blocks.*\[$3\]\$' /proc/mdstat" ||
292 die "$ip: no '$3' found in /proc/mdstat."
293 done
294 ;;
295 nosync )
296 for ip in ${NODES[@]}
297 do
298 ssh $ip "grep -Eq '(resync|recovery)' /proc/mdstat" &&
299 die "$ip: resync or recovery is happening!"
300 done
301 ;;
302 readonly )
303 for ip in ${NODES[@]}
304 do
305 ssh $ip "grep -sq "read-only" /proc/mdstat" ||
306 die "$ip: check '$2' failed!"
307 done
308 ;;
309 dmesg )
310 for ip in ${NODES[@]}
311 do
312 ssh $ip "dmesg | grep -iq 'error\|call trace\|segfault'" &&
313 die "$ip: check '$2' prints errors!"
314 done
315 ;;
316 * )
317 die "unknown parameter $2"
318 ;;
319 esac
320 }