]>
Commit | Line | Data |
---|---|---|
beb71de0 ZL |
1 | #!/bin/bash |
2 | ||
3 | CLUSTER_CONF=$PWD/cluster_conf | |
4 | ||
5 | check_ssh() | |
6 | { | |
7 | NODE1="$(grep '^NODE1' $CLUSTER_CONF | cut -d'=' -f2)" | |
8 | NODE2="$(grep '^NODE2' $CLUSTER_CONF | cut -d'=' -f2)" | |
9 | [ -z "$NODE1" -o -z "$NODE2" ] && { | |
10 | echo "Please provide node-ip in $CLUSTER_CONF." | |
11 | exit 1 | |
12 | } | |
13 | for ip in $NODE1 $NODE2 | |
14 | do | |
15 | ssh -o NumberOfPasswordPrompts=0 $ip -l root "pwd" > /dev/null | |
16 | [ $? -ne 0 ] && { | |
17 | echo "Please setup ssh-access with no-authorized mode." | |
18 | exit 1 | |
19 | } | |
20 | done | |
21 | } | |
22 | ||
23 | fetch_devlist() | |
24 | { | |
25 | ISCSI_ID="$(grep '^ISCSI_TARGET_ID' $CLUSTER_CONF | cut -d'=' -f2)" | |
26 | devlist="$(grep '^devlist' $CLUSTER_CONF | cut -d'=' -f2)" | |
27 | if [ ! -z "$ISCSI_ID" -a ! -z "$devlist" ] | |
28 | then | |
29 | echo "Config ISCSI_TARGET_ID or devlist in $CLUSTER_CONF." | |
30 | exit 1 | |
31 | elif [ ! -z "$ISCSI_ID" -a -z "$devlist" ] | |
32 | then | |
33 | for ip in $NODE1 $NODE2 | |
34 | do | |
35 | ssh $ip "ls /dev/disk/by-path/*$ISCSI_ID*" > /dev/null | |
36 | [ $? -ne 0 ] && { | |
37 | echo "$ip: No disks found in '$ISCSI_ID' connection." | |
38 | exit 1 | |
39 | } | |
40 | done | |
41 | devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*)) | |
42 | fi | |
43 | # sbd disk cannot use in testing | |
44 | for i in ${devlist[@]} | |
45 | do | |
46 | sbd -d $i dump &> /dev/null | |
47 | [ $? -eq '0' ] && devlist=(${devlist[@]#$i}) | |
48 | done | |
49 | for i in $(seq 0 ${#devlist[@]}) | |
50 | do | |
51 | eval "dev$i=${devlist[$i]}" | |
52 | done | |
53 | [ "${#devlist[@]}" -lt 6 ] && { | |
54 | echo "Cluster-md testing requires 6 disks at least." | |
55 | exit 1 | |
56 | } | |
57 | } | |
58 | ||
59 | check_dlm() | |
60 | { | |
61 | if ! crm configure show | grep -q dlm | |
62 | then | |
63 | crm configure primitive dlm ocf:pacemaker:controld \ | |
64 | op monitor interval=60 timeout=60 \ | |
65 | meta target-role=Started &> /dev/null | |
66 | crm configure group base-group dlm | |
67 | crm configure clone base-clone base-group \ | |
68 | meta interleave=true | |
69 | fi | |
70 | sleep 1 | |
71 | for ip in $NODE1 $NODE2 | |
72 | do | |
73 | ssh $ip "pgrep dlm_controld > /dev/null" || { | |
74 | echo "$ip: dlm_controld daemon doesn't exist." | |
75 | exit 1 | |
76 | } | |
77 | done | |
78 | crm_mon -r -n1 | grep -iq "fail\|not" && { | |
79 | echo "Please clear cluster-resource errors." | |
80 | exit 1 | |
81 | } | |
82 | } | |
83 | ||
84 | check_env() | |
85 | { | |
86 | user=$(id -un) | |
87 | [ "X$user" = "Xroot" ] || { | |
88 | echo "testing can only be done as 'root'." | |
89 | exit 1 | |
90 | } | |
91 | check_ssh | |
92 | commands=(mdadm iscsiadm bc modinfo dlm_controld | |
93 | udevadm crm crm_mon lsblk pgrep sbd) | |
94 | mdadm_src_ver="$($mdadm -V 2>&1)" | |
95 | for ip in $NODE1 $NODE2 | |
96 | do | |
97 | for cmd in ${commands[@]} | |
98 | do | |
99 | ssh $ip "which $cmd &> /dev/null" || { | |
100 | echo "$ip: $cmd, command not found!" | |
101 | exit 1 | |
102 | } | |
103 | done | |
104 | mdadm_sbin_ver="$(ssh $ip "mdadm -V 2>&1")" | |
105 | if [ "$mdadm_src_ver" != "$mdadm_sbin_ver" ] | |
106 | then | |
107 | echo "$ip: please run 'make install' before testing." | |
108 | exit 1 | |
109 | fi | |
110 | mods=(raid1 raid10 md_mod dlm md-cluster) | |
111 | for mod in ${mods[@]} | |
112 | do | |
113 | ssh $ip "modinfo $mod > /dev/null" || { | |
114 | echo "$ip: $mod, module doesn't exist." | |
115 | exit 1 | |
116 | } | |
117 | done | |
118 | ssh $ip "lsblk -a | grep -iq raid" | |
119 | [ $? -eq 0 ] && { | |
120 | echo "$ip: Please run testing without running RAIDs environment." | |
121 | exit 1 | |
122 | } | |
123 | ssh $ip "modprobe md_mod" | |
124 | done | |
125 | fetch_devlist | |
126 | check_dlm | |
127 | [ -d $logdir ] || mkdir -p $logdir | |
128 | } | |
129 | ||
130 | # $1/node, $2/optional | |
131 | stop_md() | |
132 | { | |
133 | if [ "$1" == "all" ] | |
134 | then | |
135 | NODES=($NODE1 $NODE2) | |
136 | elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ] | |
137 | then | |
138 | NODES=$1 | |
139 | else | |
140 | die "$1: unknown parameter." | |
141 | fi | |
142 | if [ -z "$2" ] | |
143 | then | |
144 | for ip in ${NODES[@]} | |
145 | do | |
146 | ssh $ip mdadm -Ssq | |
147 | done | |
148 | else | |
149 | for ip in ${NODES[@]} | |
150 | do | |
151 | ssh $ip mdadm -S $2 | |
152 | done | |
153 | fi | |
154 | } | |
155 | ||
156 | # $1/optional, it shows why to save log | |
157 | save_log() | |
158 | { | |
159 | status=$1 | |
160 | logfile="$status""$_basename".log | |
161 | ||
162 | cat $targetdir/stderr >> $targetdir/log | |
163 | cp $targetdir/log $logdir/$_basename.log | |
164 | ||
165 | for ip in $NODE1 $NODE2 | |
166 | do | |
167 | echo "##$ip: saving dmesg." >> $logdir/$logfile | |
168 | ssh $ip "dmesg -c" >> $logdir/$logfile | |
169 | echo "##$ip: saving proc mdstat." >> $logdir/$logfile | |
170 | ssh $ip "cat /proc/mdstat" >> $logdir/$logfile | |
171 | array=($(ssh $ip "mdadm -Ds | cut -d' ' -f2")) | |
172 | ||
173 | if [ ! -z "$array" -a ${#array[@]} -ge 1 ] | |
174 | then | |
175 | echo "##$ip: mdadm -D ${array[@]}" >> $logdir/$logfile | |
176 | ssh $ip "mdadm -D ${array[@]}" >> $logdir/$logfile | |
177 | md_disks=($(ssh $ip "mdadm -DY ${array[@]} | grep "/dev/" | cut -d'=' -f2")) | |
178 | cat /proc/mdstat | grep -q "bitmap" | |
179 | if [ $? -eq 0 ] | |
180 | then | |
181 | echo "##$ip: mdadm -X ${md_disks[@]}" >> $logdir/$logfile | |
182 | ssh $ip "mdadm -X ${md_disks[@]}" >> $logdir/$logfile | |
183 | fi | |
184 | else | |
185 | echo "##$ip: no array assembled!" >> $logdir/$logfile | |
186 | fi | |
187 | done | |
188 | [ "$1" == "fail" ] && | |
189 | echo "See $logdir/$_basename.log and $logdir/$logfile for details" | |
190 | stop_md all | |
191 | } | |
192 | ||
193 | do_setup() | |
194 | { | |
195 | check_env | |
196 | ulimit -c unlimited | |
197 | } | |
198 | ||
199 | cleanup() | |
200 | { | |
201 | check_ssh | |
202 | for ip in $NODE1 $NODE2 | |
203 | do | |
204 | ssh $ip "mdadm -Ssq; dmesg -c > /dev/null" | |
205 | done | |
206 | mdadm --zero ${devlist[@]} &> /dev/null | |
207 | } | |
208 | ||
209 | # check: $1/cluster_node $2/feature $3/optional | |
210 | check() | |
211 | { | |
212 | NODES=() | |
213 | if [ "$1" == "all" ] | |
214 | then | |
215 | NODES=($NODE1 $NODE2) | |
216 | elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ] | |
217 | then | |
218 | NODES=$1 | |
219 | else | |
220 | die "$1: unknown parameter." | |
221 | fi | |
222 | case $2 in | |
223 | spares ) | |
224 | for ip in ${NODES[@]} | |
225 | do | |
226 | spares=$(ssh $ip "tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)'") | |
227 | [ "$spares" -ne "$3" ] && | |
228 | die "$ip: expected $3 spares, but found $spares" | |
229 | done | |
230 | ;; | |
231 | raid* ) | |
232 | for ip in ${NODES[@]} | |
233 | do | |
234 | ssh $ip "grep -sq "$2" /proc/mdstat" || | |
235 | die "$ip: check '$2' failed." | |
236 | done | |
237 | ;; | |
238 | PENDING | recovery | resync | reshape ) | |
239 | cnt=5 | |
240 | for ip in ${NODES[@]} | |
241 | do | |
242 | while ! ssh $ip "grep -sq '$2' /proc/mdstat" | |
243 | do | |
244 | if [ "$cnt" -gt '0' ] | |
245 | then | |
246 | sleep 0.2 | |
247 | cnt=$[cnt-1] | |
248 | else | |
249 | die "$ip: no '$2' happening!" | |
250 | fi | |
251 | done | |
252 | done | |
253 | ;; | |
254 | wait ) | |
255 | local cnt=60 | |
256 | for ip in ${NODES[@]} | |
257 | do | |
258 | p=$(ssh $ip "cat /proc/sys/dev/raid/speed_limit_max") | |
259 | ssh $ip "echo 200000 > /proc/sys/dev/raid/speed_limit_max" | |
260 | while ssh $ip "grep -Esq '(resync|recovery|reshape|check|repair)' /proc/mdstat" | |
261 | do | |
262 | if [ "$cnt" -gt '0' ] | |
263 | then | |
264 | sleep 5 | |
265 | cnt=$[cnt-1] | |
266 | else | |
267 | die "$ip: Check '$2' timeout over 300 seconds." | |
268 | fi | |
269 | done | |
270 | ssh $ip "echo $p > /proc/sys/dev/raid/speed_limit_max" | |
271 | done | |
272 | ;; | |
273 | bitmap ) | |
274 | for ip in ${NODES[@]} | |
275 | do | |
276 | echo $ip | |
277 | ssh $ip cat /proc/mdstat | |
278 | ssh $ip "grep -sq '$2' /proc/mdstat" || | |
279 | die "$ip: no '$2' found in /proc/mdstat." | |
280 | done | |
281 | ;; | |
282 | chunk ) | |
283 | for ip in ${NODES[@]} | |
284 | do | |
285 | chunk_size=`awk -F',' '/chunk/{print $2}' /proc/mdstat | awk -F'[a-z]' '{print $1}'` | |
286 | [ "$chunk_size" -ne "$3" ] && | |
287 | die "$ip: chunksize should be $3, but it's $chunk_size" | |
288 | done | |
289 | ;; | |
290 | state ) | |
291 | for ip in ${NODES[@]} | |
292 | do | |
293 | ssh $ip "grep -Esq 'blocks.*\[$3\]\$' /proc/mdstat" || | |
294 | die "$ip: no '$3' found in /proc/mdstat." | |
295 | done | |
296 | ;; | |
297 | nosync ) | |
298 | for ip in ${NODES[@]} | |
299 | do | |
300 | ssh $ip "grep -Eq '(resync|recovery)' /proc/mdstat" && | |
301 | die "$ip: resync or recovery is happening!" | |
302 | done | |
303 | ;; | |
304 | readonly ) | |
305 | for ip in ${NODES[@]} | |
306 | do | |
307 | ssh $ip "grep -sq "read-only" /proc/mdstat" || | |
308 | die "$ip: check '$2' failed!" | |
309 | done | |
310 | ;; | |
311 | dmesg ) | |
312 | for ip in ${NODES[@]} | |
313 | do | |
314 | ssh $ip "dmesg | grep -iq 'error\|call trace\|segfault'" && | |
315 | die "$ip: check '$2' prints errors!" | |
316 | done | |
317 | ;; | |
318 | * ) | |
319 | die "unknown parameter $2" | |
320 | ;; | |
321 | esac | |
322 | } |