]>
Commit | Line | Data |
---|---|---|
beb71de0 ZL |
1 | #!/bin/bash |
2 | ||
beb71de0 ZL |
3 | check_ssh() |
4 | { | |
5 | NODE1="$(grep '^NODE1' $CLUSTER_CONF | cut -d'=' -f2)" | |
6 | NODE2="$(grep '^NODE2' $CLUSTER_CONF | cut -d'=' -f2)" | |
7 | [ -z "$NODE1" -o -z "$NODE2" ] && { | |
8 | echo "Please provide node-ip in $CLUSTER_CONF." | |
9 | exit 1 | |
10 | } | |
11 | for ip in $NODE1 $NODE2 | |
12 | do | |
13 | ssh -o NumberOfPasswordPrompts=0 $ip -l root "pwd" > /dev/null | |
14 | [ $? -ne 0 ] && { | |
15 | echo "Please setup ssh-access with no-authorized mode." | |
16 | exit 1 | |
17 | } | |
18 | done | |
19 | } | |
20 | ||
21 | fetch_devlist() | |
22 | { | |
23 | ISCSI_ID="$(grep '^ISCSI_TARGET_ID' $CLUSTER_CONF | cut -d'=' -f2)" | |
24 | devlist="$(grep '^devlist' $CLUSTER_CONF | cut -d'=' -f2)" | |
25 | if [ ! -z "$ISCSI_ID" -a ! -z "$devlist" ] | |
26 | then | |
27 | echo "Config ISCSI_TARGET_ID or devlist in $CLUSTER_CONF." | |
28 | exit 1 | |
29 | elif [ ! -z "$ISCSI_ID" -a -z "$devlist" ] | |
30 | then | |
31 | for ip in $NODE1 $NODE2 | |
32 | do | |
33 | ssh $ip "ls /dev/disk/by-path/*$ISCSI_ID*" > /dev/null | |
34 | [ $? -ne 0 ] && { | |
35 | echo "$ip: No disks found in '$ISCSI_ID' connection." | |
36 | exit 1 | |
37 | } | |
38 | done | |
39 | devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*)) | |
40 | fi | |
41 | # sbd disk cannot use in testing | |
42 | for i in ${devlist[@]} | |
43 | do | |
44 | sbd -d $i dump &> /dev/null | |
45 | [ $? -eq '0' ] && devlist=(${devlist[@]#$i}) | |
46 | done | |
47 | for i in $(seq 0 ${#devlist[@]}) | |
48 | do | |
49 | eval "dev$i=${devlist[$i]}" | |
50 | done | |
51 | [ "${#devlist[@]}" -lt 6 ] && { | |
52 | echo "Cluster-md testing requires 6 disks at least." | |
53 | exit 1 | |
54 | } | |
55 | } | |
56 | ||
57 | check_dlm() | |
58 | { | |
59 | if ! crm configure show | grep -q dlm | |
60 | then | |
61 | crm configure primitive dlm ocf:pacemaker:controld \ | |
62 | op monitor interval=60 timeout=60 \ | |
63 | meta target-role=Started &> /dev/null | |
64 | crm configure group base-group dlm | |
65 | crm configure clone base-clone base-group \ | |
66 | meta interleave=true | |
67 | fi | |
68 | sleep 1 | |
69 | for ip in $NODE1 $NODE2 | |
70 | do | |
71 | ssh $ip "pgrep dlm_controld > /dev/null" || { | |
72 | echo "$ip: dlm_controld daemon doesn't exist." | |
73 | exit 1 | |
74 | } | |
75 | done | |
76 | crm_mon -r -n1 | grep -iq "fail\|not" && { | |
77 | echo "Please clear cluster-resource errors." | |
78 | exit 1 | |
79 | } | |
80 | } | |
81 | ||
82 | check_env() | |
83 | { | |
84 | user=$(id -un) | |
85 | [ "X$user" = "Xroot" ] || { | |
86 | echo "testing can only be done as 'root'." | |
87 | exit 1 | |
88 | } | |
89 | check_ssh | |
90 | commands=(mdadm iscsiadm bc modinfo dlm_controld | |
91 | udevadm crm crm_mon lsblk pgrep sbd) | |
92 | mdadm_src_ver="$($mdadm -V 2>&1)" | |
93 | for ip in $NODE1 $NODE2 | |
94 | do | |
95 | for cmd in ${commands[@]} | |
96 | do | |
97 | ssh $ip "which $cmd &> /dev/null" || { | |
98 | echo "$ip: $cmd, command not found!" | |
99 | exit 1 | |
100 | } | |
101 | done | |
102 | mdadm_sbin_ver="$(ssh $ip "mdadm -V 2>&1")" | |
103 | if [ "$mdadm_src_ver" != "$mdadm_sbin_ver" ] | |
104 | then | |
105 | echo "$ip: please run 'make install' before testing." | |
106 | exit 1 | |
107 | fi | |
108 | mods=(raid1 raid10 md_mod dlm md-cluster) | |
109 | for mod in ${mods[@]} | |
110 | do | |
111 | ssh $ip "modinfo $mod > /dev/null" || { | |
112 | echo "$ip: $mod, module doesn't exist." | |
113 | exit 1 | |
114 | } | |
115 | done | |
116 | ssh $ip "lsblk -a | grep -iq raid" | |
117 | [ $? -eq 0 ] && { | |
118 | echo "$ip: Please run testing without running RAIDs environment." | |
119 | exit 1 | |
120 | } | |
121 | ssh $ip "modprobe md_mod" | |
122 | done | |
123 | fetch_devlist | |
124 | check_dlm | |
125 | [ -d $logdir ] || mkdir -p $logdir | |
126 | } | |
127 | ||
128 | # $1/node, $2/optional | |
129 | stop_md() | |
130 | { | |
131 | if [ "$1" == "all" ] | |
132 | then | |
133 | NODES=($NODE1 $NODE2) | |
134 | elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ] | |
135 | then | |
136 | NODES=$1 | |
137 | else | |
138 | die "$1: unknown parameter." | |
139 | fi | |
140 | if [ -z "$2" ] | |
141 | then | |
142 | for ip in ${NODES[@]} | |
143 | do | |
144 | ssh $ip mdadm -Ssq | |
145 | done | |
146 | else | |
147 | for ip in ${NODES[@]} | |
148 | do | |
149 | ssh $ip mdadm -S $2 | |
150 | done | |
151 | fi | |
152 | } | |
153 | ||
154 | # $1/optional, it shows why to save log | |
155 | save_log() | |
156 | { | |
157 | status=$1 | |
158 | logfile="$status""$_basename".log | |
159 | ||
160 | cat $targetdir/stderr >> $targetdir/log | |
161 | cp $targetdir/log $logdir/$_basename.log | |
162 | ||
163 | for ip in $NODE1 $NODE2 | |
164 | do | |
165 | echo "##$ip: saving dmesg." >> $logdir/$logfile | |
166 | ssh $ip "dmesg -c" >> $logdir/$logfile | |
167 | echo "##$ip: saving proc mdstat." >> $logdir/$logfile | |
168 | ssh $ip "cat /proc/mdstat" >> $logdir/$logfile | |
169 | array=($(ssh $ip "mdadm -Ds | cut -d' ' -f2")) | |
170 | ||
171 | if [ ! -z "$array" -a ${#array[@]} -ge 1 ] | |
172 | then | |
173 | echo "##$ip: mdadm -D ${array[@]}" >> $logdir/$logfile | |
174 | ssh $ip "mdadm -D ${array[@]}" >> $logdir/$logfile | |
175 | md_disks=($(ssh $ip "mdadm -DY ${array[@]} | grep "/dev/" | cut -d'=' -f2")) | |
176 | cat /proc/mdstat | grep -q "bitmap" | |
177 | if [ $? -eq 0 ] | |
178 | then | |
179 | echo "##$ip: mdadm -X ${md_disks[@]}" >> $logdir/$logfile | |
180 | ssh $ip "mdadm -X ${md_disks[@]}" >> $logdir/$logfile | |
181 | fi | |
182 | else | |
183 | echo "##$ip: no array assembled!" >> $logdir/$logfile | |
184 | fi | |
185 | done | |
186 | [ "$1" == "fail" ] && | |
187 | echo "See $logdir/$_basename.log and $logdir/$logfile for details" | |
188 | stop_md all | |
189 | } | |
190 | ||
191 | do_setup() | |
192 | { | |
193 | check_env | |
194 | ulimit -c unlimited | |
195 | } | |
196 | ||
197 | cleanup() | |
198 | { | |
199 | check_ssh | |
200 | for ip in $NODE1 $NODE2 | |
201 | do | |
202 | ssh $ip "mdadm -Ssq; dmesg -c > /dev/null" | |
203 | done | |
204 | mdadm --zero ${devlist[@]} &> /dev/null | |
205 | } | |
206 | ||
207 | # check: $1/cluster_node $2/feature $3/optional | |
208 | check() | |
209 | { | |
210 | NODES=() | |
211 | if [ "$1" == "all" ] | |
212 | then | |
213 | NODES=($NODE1 $NODE2) | |
214 | elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ] | |
215 | then | |
216 | NODES=$1 | |
217 | else | |
218 | die "$1: unknown parameter." | |
219 | fi | |
220 | case $2 in | |
221 | spares ) | |
222 | for ip in ${NODES[@]} | |
223 | do | |
224 | spares=$(ssh $ip "tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)'") | |
225 | [ "$spares" -ne "$3" ] && | |
226 | die "$ip: expected $3 spares, but found $spares" | |
227 | done | |
228 | ;; | |
229 | raid* ) | |
230 | for ip in ${NODES[@]} | |
231 | do | |
232 | ssh $ip "grep -sq "$2" /proc/mdstat" || | |
233 | die "$ip: check '$2' failed." | |
234 | done | |
235 | ;; | |
236 | PENDING | recovery | resync | reshape ) | |
237 | cnt=5 | |
238 | for ip in ${NODES[@]} | |
239 | do | |
240 | while ! ssh $ip "grep -sq '$2' /proc/mdstat" | |
241 | do | |
242 | if [ "$cnt" -gt '0' ] | |
243 | then | |
244 | sleep 0.2 | |
245 | cnt=$[cnt-1] | |
246 | else | |
247 | die "$ip: no '$2' happening!" | |
248 | fi | |
249 | done | |
250 | done | |
251 | ;; | |
252 | wait ) | |
253 | local cnt=60 | |
254 | for ip in ${NODES[@]} | |
255 | do | |
256 | p=$(ssh $ip "cat /proc/sys/dev/raid/speed_limit_max") | |
257 | ssh $ip "echo 200000 > /proc/sys/dev/raid/speed_limit_max" | |
258 | while ssh $ip "grep -Esq '(resync|recovery|reshape|check|repair)' /proc/mdstat" | |
259 | do | |
260 | if [ "$cnt" -gt '0' ] | |
261 | then | |
262 | sleep 5 | |
263 | cnt=$[cnt-1] | |
264 | else | |
265 | die "$ip: Check '$2' timeout over 300 seconds." | |
266 | fi | |
267 | done | |
268 | ssh $ip "echo $p > /proc/sys/dev/raid/speed_limit_max" | |
269 | done | |
270 | ;; | |
271 | bitmap ) | |
272 | for ip in ${NODES[@]} | |
273 | do | |
274 | echo $ip | |
275 | ssh $ip cat /proc/mdstat | |
276 | ssh $ip "grep -sq '$2' /proc/mdstat" || | |
277 | die "$ip: no '$2' found in /proc/mdstat." | |
278 | done | |
279 | ;; | |
280 | chunk ) | |
281 | for ip in ${NODES[@]} | |
282 | do | |
283 | chunk_size=`awk -F',' '/chunk/{print $2}' /proc/mdstat | awk -F'[a-z]' '{print $1}'` | |
284 | [ "$chunk_size" -ne "$3" ] && | |
285 | die "$ip: chunksize should be $3, but it's $chunk_size" | |
286 | done | |
287 | ;; | |
288 | state ) | |
289 | for ip in ${NODES[@]} | |
290 | do | |
291 | ssh $ip "grep -Esq 'blocks.*\[$3\]\$' /proc/mdstat" || | |
292 | die "$ip: no '$3' found in /proc/mdstat." | |
293 | done | |
294 | ;; | |
295 | nosync ) | |
296 | for ip in ${NODES[@]} | |
297 | do | |
298 | ssh $ip "grep -Eq '(resync|recovery)' /proc/mdstat" && | |
299 | die "$ip: resync or recovery is happening!" | |
300 | done | |
301 | ;; | |
302 | readonly ) | |
303 | for ip in ${NODES[@]} | |
304 | do | |
305 | ssh $ip "grep -sq "read-only" /proc/mdstat" || | |
306 | die "$ip: check '$2' failed!" | |
307 | done | |
308 | ;; | |
309 | dmesg ) | |
310 | for ip in ${NODES[@]} | |
311 | do | |
312 | ssh $ip "dmesg | grep -iq 'error\|call trace\|segfault'" && | |
313 | die "$ip: check '$2' prints errors!" | |
314 | done | |
315 | ;; | |
316 | * ) | |
317 | die "unknown parameter $2" | |
318 | ;; | |
319 | esac | |
320 | } |