]>
Commit | Line | Data |
---|---|---|
beb71de0 ZL |
1 | #!/bin/bash |
2 | ||
beb71de0 ZL |
3 | check_ssh() |
4 | { | |
5 | NODE1="$(grep '^NODE1' $CLUSTER_CONF | cut -d'=' -f2)" | |
6 | NODE2="$(grep '^NODE2' $CLUSTER_CONF | cut -d'=' -f2)" | |
7 | [ -z "$NODE1" -o -z "$NODE2" ] && { | |
8 | echo "Please provide node-ip in $CLUSTER_CONF." | |
9 | exit 1 | |
10 | } | |
11 | for ip in $NODE1 $NODE2 | |
12 | do | |
13 | ssh -o NumberOfPasswordPrompts=0 $ip -l root "pwd" > /dev/null | |
14 | [ $? -ne 0 ] && { | |
15 | echo "Please setup ssh-access with no-authorized mode." | |
16 | exit 1 | |
17 | } | |
18 | done | |
19 | } | |
20 | ||
21 | fetch_devlist() | |
22 | { | |
23 | ISCSI_ID="$(grep '^ISCSI_TARGET_ID' $CLUSTER_CONF | cut -d'=' -f2)" | |
24 | devlist="$(grep '^devlist' $CLUSTER_CONF | cut -d'=' -f2)" | |
25 | if [ ! -z "$ISCSI_ID" -a ! -z "$devlist" ] | |
26 | then | |
27 | echo "Config ISCSI_TARGET_ID or devlist in $CLUSTER_CONF." | |
28 | exit 1 | |
29 | elif [ ! -z "$ISCSI_ID" -a -z "$devlist" ] | |
30 | then | |
31 | for ip in $NODE1 $NODE2 | |
32 | do | |
33 | ssh $ip "ls /dev/disk/by-path/*$ISCSI_ID*" > /dev/null | |
34 | [ $? -ne 0 ] && { | |
35 | echo "$ip: No disks found in '$ISCSI_ID' connection." | |
36 | exit 1 | |
37 | } | |
38 | done | |
39 | devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*)) | |
40 | fi | |
41 | # sbd disk cannot use in testing | |
8063fd0f XN |
42 | # Init devlist as an array |
43 | i='' | |
44 | devlist=(${devlist[@]#$i}) | |
beb71de0 ZL |
45 | for i in ${devlist[@]} |
46 | do | |
47 | sbd -d $i dump &> /dev/null | |
48 | [ $? -eq '0' ] && devlist=(${devlist[@]#$i}) | |
49 | done | |
50 | for i in $(seq 0 ${#devlist[@]}) | |
51 | do | |
52 | eval "dev$i=${devlist[$i]}" | |
53 | done | |
54 | [ "${#devlist[@]}" -lt 6 ] && { | |
55 | echo "Cluster-md testing requires 6 disks at least." | |
56 | exit 1 | |
57 | } | |
58 | } | |
59 | ||
60 | check_dlm() | |
61 | { | |
62 | if ! crm configure show | grep -q dlm | |
63 | then | |
64 | crm configure primitive dlm ocf:pacemaker:controld \ | |
65 | op monitor interval=60 timeout=60 \ | |
66 | meta target-role=Started &> /dev/null | |
67 | crm configure group base-group dlm | |
68 | crm configure clone base-clone base-group \ | |
69 | meta interleave=true | |
70 | fi | |
71 | sleep 1 | |
72 | for ip in $NODE1 $NODE2 | |
73 | do | |
74 | ssh $ip "pgrep dlm_controld > /dev/null" || { | |
75 | echo "$ip: dlm_controld daemon doesn't exist." | |
76 | exit 1 | |
77 | } | |
78 | done | |
79 | crm_mon -r -n1 | grep -iq "fail\|not" && { | |
80 | echo "Please clear cluster-resource errors." | |
81 | exit 1 | |
82 | } | |
83 | } | |
84 | ||
85 | check_env() | |
86 | { | |
87 | user=$(id -un) | |
88 | [ "X$user" = "Xroot" ] || { | |
89 | echo "testing can only be done as 'root'." | |
90 | exit 1 | |
91 | } | |
38e955cb ZL |
92 | [ \! -x $mdadm ] && { |
93 | echo "test: please run make everything before perform testing." | |
94 | exit 1 | |
95 | } | |
beb71de0 ZL |
96 | check_ssh |
97 | commands=(mdadm iscsiadm bc modinfo dlm_controld | |
98 | udevadm crm crm_mon lsblk pgrep sbd) | |
beb71de0 ZL |
99 | for ip in $NODE1 $NODE2 |
100 | do | |
101 | for cmd in ${commands[@]} | |
102 | do | |
103 | ssh $ip "which $cmd &> /dev/null" || { | |
104 | echo "$ip: $cmd, command not found!" | |
105 | exit 1 | |
106 | } | |
107 | done | |
beb71de0 ZL |
108 | mods=(raid1 raid10 md_mod dlm md-cluster) |
109 | for mod in ${mods[@]} | |
110 | do | |
111 | ssh $ip "modinfo $mod > /dev/null" || { | |
112 | echo "$ip: $mod, module doesn't exist." | |
113 | exit 1 | |
114 | } | |
115 | done | |
116 | ssh $ip "lsblk -a | grep -iq raid" | |
117 | [ $? -eq 0 ] && { | |
118 | echo "$ip: Please run testing without running RAIDs environment." | |
119 | exit 1 | |
120 | } | |
121 | ssh $ip "modprobe md_mod" | |
122 | done | |
123 | fetch_devlist | |
124 | check_dlm | |
125 | [ -d $logdir ] || mkdir -p $logdir | |
126 | } | |
127 | ||
128 | # $1/node, $2/optional | |
129 | stop_md() | |
130 | { | |
131 | if [ "$1" == "all" ] | |
132 | then | |
133 | NODES=($NODE1 $NODE2) | |
134 | elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ] | |
135 | then | |
136 | NODES=$1 | |
137 | else | |
138 | die "$1: unknown parameter." | |
139 | fi | |
140 | if [ -z "$2" ] | |
141 | then | |
142 | for ip in ${NODES[@]} | |
143 | do | |
144 | ssh $ip mdadm -Ssq | |
145 | done | |
146 | else | |
147 | for ip in ${NODES[@]} | |
148 | do | |
149 | ssh $ip mdadm -S $2 | |
150 | done | |
151 | fi | |
152 | } | |
153 | ||
154 | # $1/optional, it shows why to save log | |
155 | save_log() | |
156 | { | |
157 | status=$1 | |
158 | logfile="$status""$_basename".log | |
159 | ||
160 | cat $targetdir/stderr >> $targetdir/log | |
161 | cp $targetdir/log $logdir/$_basename.log | |
162 | ||
163 | for ip in $NODE1 $NODE2 | |
164 | do | |
165 | echo "##$ip: saving dmesg." >> $logdir/$logfile | |
166 | ssh $ip "dmesg -c" >> $logdir/$logfile | |
167 | echo "##$ip: saving proc mdstat." >> $logdir/$logfile | |
168 | ssh $ip "cat /proc/mdstat" >> $logdir/$logfile | |
169 | array=($(ssh $ip "mdadm -Ds | cut -d' ' -f2")) | |
170 | ||
171 | if [ ! -z "$array" -a ${#array[@]} -ge 1 ] | |
172 | then | |
173 | echo "##$ip: mdadm -D ${array[@]}" >> $logdir/$logfile | |
174 | ssh $ip "mdadm -D ${array[@]}" >> $logdir/$logfile | |
175 | md_disks=($(ssh $ip "mdadm -DY ${array[@]} | grep "/dev/" | cut -d'=' -f2")) | |
176 | cat /proc/mdstat | grep -q "bitmap" | |
177 | if [ $? -eq 0 ] | |
178 | then | |
179 | echo "##$ip: mdadm -X ${md_disks[@]}" >> $logdir/$logfile | |
180 | ssh $ip "mdadm -X ${md_disks[@]}" >> $logdir/$logfile | |
7d81135e ZL |
181 | echo "##$ip: mdadm -E ${md_disks[@]}" >> $logdir/$logfile |
182 | ssh $ip "mdadm -E ${md_disks[@]}" >> $logdir/$logfile | |
beb71de0 ZL |
183 | fi |
184 | else | |
185 | echo "##$ip: no array assembled!" >> $logdir/$logfile | |
186 | fi | |
187 | done | |
188 | [ "$1" == "fail" ] && | |
189 | echo "See $logdir/$_basename.log and $logdir/$logfile for details" | |
190 | stop_md all | |
191 | } | |
192 | ||
193 | do_setup() | |
194 | { | |
195 | check_env | |
196 | ulimit -c unlimited | |
197 | } | |
198 | ||
064bd3f5 | 199 | do_clean() |
beb71de0 | 200 | { |
beb71de0 ZL |
201 | for ip in $NODE1 $NODE2 |
202 | do | |
203 | ssh $ip "mdadm -Ssq; dmesg -c > /dev/null" | |
204 | done | |
205 | mdadm --zero ${devlist[@]} &> /dev/null | |
206 | } | |
207 | ||
064bd3f5 ZL |
208 | cleanup() |
209 | { | |
210 | check_ssh | |
211 | do_clean | |
212 | } | |
213 | ||
beb71de0 ZL |
214 | # check: $1/cluster_node $2/feature $3/optional |
215 | check() | |
216 | { | |
217 | NODES=() | |
218 | if [ "$1" == "all" ] | |
219 | then | |
220 | NODES=($NODE1 $NODE2) | |
221 | elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ] | |
222 | then | |
223 | NODES=$1 | |
224 | else | |
225 | die "$1: unknown parameter." | |
226 | fi | |
227 | case $2 in | |
228 | spares ) | |
229 | for ip in ${NODES[@]} | |
230 | do | |
231 | spares=$(ssh $ip "tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)'") | |
232 | [ "$spares" -ne "$3" ] && | |
233 | die "$ip: expected $3 spares, but found $spares" | |
234 | done | |
235 | ;; | |
236 | raid* ) | |
237 | for ip in ${NODES[@]} | |
238 | do | |
239 | ssh $ip "grep -sq "$2" /proc/mdstat" || | |
240 | die "$ip: check '$2' failed." | |
241 | done | |
242 | ;; | |
243 | PENDING | recovery | resync | reshape ) | |
244 | cnt=5 | |
245 | for ip in ${NODES[@]} | |
246 | do | |
247 | while ! ssh $ip "grep -sq '$2' /proc/mdstat" | |
248 | do | |
249 | if [ "$cnt" -gt '0' ] | |
250 | then | |
251 | sleep 0.2 | |
252 | cnt=$[cnt-1] | |
253 | else | |
254 | die "$ip: no '$2' happening!" | |
255 | fi | |
256 | done | |
257 | done | |
258 | ;; | |
259 | wait ) | |
260 | local cnt=60 | |
261 | for ip in ${NODES[@]} | |
262 | do | |
263 | p=$(ssh $ip "cat /proc/sys/dev/raid/speed_limit_max") | |
264 | ssh $ip "echo 200000 > /proc/sys/dev/raid/speed_limit_max" | |
265 | while ssh $ip "grep -Esq '(resync|recovery|reshape|check|repair)' /proc/mdstat" | |
266 | do | |
267 | if [ "$cnt" -gt '0' ] | |
268 | then | |
269 | sleep 5 | |
270 | cnt=$[cnt-1] | |
271 | else | |
272 | die "$ip: Check '$2' timeout over 300 seconds." | |
273 | fi | |
274 | done | |
275 | ssh $ip "echo $p > /proc/sys/dev/raid/speed_limit_max" | |
276 | done | |
277 | ;; | |
278 | bitmap ) | |
279 | for ip in ${NODES[@]} | |
280 | do | |
beb71de0 ZL |
281 | ssh $ip "grep -sq '$2' /proc/mdstat" || |
282 | die "$ip: no '$2' found in /proc/mdstat." | |
283 | done | |
284 | ;; | |
f7331a11 ZL |
285 | nobitmap ) |
286 | for ip in ${NODES[@]} | |
287 | do | |
288 | ssh $ip "grep -sq 'bitmap' /proc/mdstat" && | |
289 | die "$ip: 'bitmap' found in /proc/mdstat." | |
290 | done | |
291 | ;; | |
beb71de0 ZL |
292 | chunk ) |
293 | for ip in ${NODES[@]} | |
294 | do | |
295 | chunk_size=`awk -F',' '/chunk/{print $2}' /proc/mdstat | awk -F'[a-z]' '{print $1}'` | |
296 | [ "$chunk_size" -ne "$3" ] && | |
297 | die "$ip: chunksize should be $3, but it's $chunk_size" | |
298 | done | |
299 | ;; | |
300 | state ) | |
301 | for ip in ${NODES[@]} | |
302 | do | |
303 | ssh $ip "grep -Esq 'blocks.*\[$3\]\$' /proc/mdstat" || | |
304 | die "$ip: no '$3' found in /proc/mdstat." | |
305 | done | |
306 | ;; | |
307 | nosync ) | |
308 | for ip in ${NODES[@]} | |
309 | do | |
310 | ssh $ip "grep -Eq '(resync|recovery)' /proc/mdstat" && | |
311 | die "$ip: resync or recovery is happening!" | |
312 | done | |
313 | ;; | |
314 | readonly ) | |
315 | for ip in ${NODES[@]} | |
316 | do | |
317 | ssh $ip "grep -sq "read-only" /proc/mdstat" || | |
318 | die "$ip: check '$2' failed!" | |
319 | done | |
320 | ;; | |
321 | dmesg ) | |
322 | for ip in ${NODES[@]} | |
323 | do | |
324 | ssh $ip "dmesg | grep -iq 'error\|call trace\|segfault'" && | |
325 | die "$ip: check '$2' prints errors!" | |
326 | done | |
327 | ;; | |
328 | * ) | |
329 | die "unknown parameter $2" | |
330 | ;; | |
331 | esac | |
332 | } |