]> git.ipfire.org Git - thirdparty/hostap.git/blob - tests/hwsim/vm/parallel-vm.py
3dd09584f8bcdb91756fbadf1eb542a3e81d329d
[thirdparty/hostap.git] / tests / hwsim / vm / parallel-vm.py
1 #!/usr/bin/env python3
2 #
3 # Parallel VM test case executor
4 # Copyright (c) 2014-2019, Jouni Malinen <j@w1.fi>
5 #
6 # This software may be distributed under the terms of the BSD license.
7 # See README for more details.
8
9 from __future__ import print_function
10 import curses
11 import fcntl
12 import logging
13 import multiprocessing
14 import os
15 import selectors
16 import subprocess
17 import sys
18 import time
19 import errno
20
21 logger = logging.getLogger()
22
23 # Test cases that take significantly longer time to execute than average.
24 long_tests = ["ap_roam_open",
25 "wpas_mesh_password_mismatch_retry",
26 "wpas_mesh_password_mismatch",
27 "hostapd_oom_wpa2_psk_connect",
28 "ap_hs20_fetch_osu_stop",
29 "ap_roam_wpa2_psk",
30 "ibss_wpa_none_ccmp",
31 "nfc_wps_er_handover_pk_hash_mismatch_sta",
32 "go_neg_peers_force_diff_freq",
33 "p2p_cli_invite",
34 "sta_ap_scan_2b",
35 "ap_pmf_sta_unprot_deauth_burst",
36 "ap_bss_add_remove_during_ht_scan",
37 "wext_scan_hidden",
38 "autoscan_exponential",
39 "nfc_p2p_client",
40 "wnm_bss_keep_alive",
41 "ap_inactivity_disconnect",
42 "scan_bss_expiration_age",
43 "autoscan_periodic",
44 "discovery_group_client",
45 "concurrent_p2pcli",
46 "ap_bss_add_remove",
47 "wpas_ap_wps",
48 "wext_pmksa_cache",
49 "ibss_wpa_none",
50 "ap_ht_40mhz_intolerant_ap",
51 "ibss_rsn",
52 "discovery_pd_retries",
53 "ap_wps_setup_locked_timeout",
54 "ap_vht160",
55 'he160',
56 'he160b',
57 "dfs_radar",
58 "dfs",
59 "dfs_ht40_minus",
60 "dfs_etsi",
61 "dfs_radar_vht80_downgrade",
62 "ap_acs_dfs",
63 "grpform_cred_ready_timeout",
64 "hostapd_oom_wpa2_eap_connect",
65 "wpas_ap_dfs",
66 "autogo_many",
67 "hostapd_oom_wpa2_eap",
68 "ibss_open",
69 "proxyarp_open_ebtables",
70 "proxyarp_open_ebtables_ipv6",
71 "radius_failover",
72 "obss_scan_40_intolerant",
73 "dbus_connect_oom",
74 "proxyarp_open",
75 "proxyarp_open_ipv6",
76 "ap_wps_iteration",
77 "ap_wps_iteration_error",
78 "ap_wps_pbc_timeout",
79 "ap_wps_http_timeout",
80 "p2p_go_move_reg_change",
81 "p2p_go_move_active",
82 "p2p_go_move_scm",
83 "p2p_go_move_scm_peer_supports",
84 "p2p_go_move_scm_peer_does_not_support",
85 "p2p_go_move_scm_multi"]
86
87 def get_failed(vm):
88 failed = []
89 for i in range(num_servers):
90 failed += vm[i]['failed']
91 return failed
92
93 def vm_read_stdout(vm, test_queue):
94 global total_started, total_passed, total_failed, total_skipped
95 global rerun_failures
96 global first_run_failures
97
98 ready = False
99 try:
100 out = vm['proc'].stdout.read()
101 if out == None:
102 return False
103 out = out.decode()
104 except IOError as e:
105 if e.errno == errno.EAGAIN:
106 return False
107 raise
108 logger.debug("VM[%d] stdout.read[%s]" % (vm['idx'], out.rstrip()))
109 pending = vm['pending'] + out
110 lines = []
111 while True:
112 pos = pending.find('\n')
113 if pos < 0:
114 break
115 line = pending[0:pos].rstrip()
116 pending = pending[(pos + 1):]
117 logger.debug("VM[%d] stdout full line[%s]" % (vm['idx'], line))
118 if line.startswith("READY"):
119 vm['starting'] = False
120 vm['started'] = True
121 ready = True
122 elif line.startswith("PASS"):
123 ready = True
124 total_passed += 1
125 elif line.startswith("FAIL"):
126 ready = True
127 total_failed += 1
128 vals = line.split(' ')
129 if len(vals) < 2:
130 logger.info("VM[%d] incomplete FAIL line: %s" % (vm['idx'],
131 line))
132 name = line
133 else:
134 name = vals[1]
135 logger.debug("VM[%d] test case failed: %s" % (vm['idx'], name))
136 vm['failed'].append(name)
137 if name != vm['current_name']:
138 logger.info("VM[%d] test result mismatch: %s (expected %s)" % (vm['idx'], name, vm['current_name']))
139 else:
140 count = vm['current_count']
141 if count == 0:
142 first_run_failures.append(name)
143 if rerun_failures and count < 1:
144 logger.debug("Requeue test case %s" % name)
145 test_queue.append((name, vm['current_count'] + 1))
146 elif line.startswith("NOT-FOUND"):
147 ready = True
148 total_failed += 1
149 logger.info("VM[%d] test case not found" % vm['idx'])
150 elif line.startswith("SKIP"):
151 ready = True
152 total_skipped += 1
153 elif line.startswith("REASON"):
154 vm['skip_reason'].append(line[7:])
155 elif line.startswith("START"):
156 total_started += 1
157 if len(vm['failed']) == 0:
158 vals = line.split(' ')
159 if len(vals) >= 2:
160 vm['fail_seq'].append(vals[1])
161 vm['out'] += line + '\n'
162 lines.append(line)
163 vm['pending'] = pending
164 return ready
165
166 def start_vm(vm, sel):
167 logger.info("VM[%d] starting up" % (vm['idx'] + 1))
168 vm['starting'] = True
169 vm['proc'] = subprocess.Popen(vm['cmd'],
170 stdin=subprocess.PIPE,
171 stdout=subprocess.PIPE,
172 stderr=subprocess.PIPE)
173 vm['cmd'] = None
174 for stream in [vm['proc'].stdout, vm['proc'].stderr]:
175 fd = stream.fileno()
176 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
177 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
178 sel.register(stream, selectors.EVENT_READ, vm)
179
180 def num_vm_starting():
181 count = 0
182 for i in range(num_servers):
183 if vm[i]['starting']:
184 count += 1
185 return count
186
187 def vm_read_stderr(vm):
188 try:
189 err = vm['proc'].stderr.read()
190 if err != None:
191 err = err.decode()
192 if len(err) > 0:
193 vm['err'] += err
194 logger.info("VM[%d] stderr.read[%s]" % (vm['idx'], err))
195 except IOError as e:
196 if e.errno != errno.EAGAIN:
197 raise
198
199 def vm_next_step(_vm, scr, test_queue):
200 scr.move(_vm['idx'] + 1, 10)
201 scr.clrtoeol()
202 if not test_queue:
203 _vm['proc'].stdin.write(b'\n')
204 _vm['proc'].stdin.flush()
205 scr.addstr("shutting down")
206 logger.info("VM[%d] shutting down" % _vm['idx'])
207 return
208 (name, count) = test_queue.pop(0)
209 _vm['current_name'] = name
210 _vm['current_count'] = count
211 _vm['proc'].stdin.write(name.encode() + b'\n')
212 _vm['proc'].stdin.flush()
213 scr.addstr(name)
214 logger.debug("VM[%d] start test %s" % (_vm['idx'], name))
215
216 def check_vm_start(scr, sel, test_queue):
217 running = False
218 for i in range(num_servers):
219 if vm[i]['proc']:
220 running = True
221 continue
222
223 # Either not yet started or already stopped VM
224 max_start = multiprocessing.cpu_count()
225 if max_start > 4:
226 max_start /= 2
227 num_starting = num_vm_starting()
228 if vm[i]['cmd'] and len(test_queue) > num_starting and \
229 num_starting < max_start:
230 scr.move(i + 1, 10)
231 scr.clrtoeol()
232 scr.addstr(i + 1, 10, "starting VM")
233 start_vm(vm[i], sel)
234 return True, True
235
236 return running, False
237
238 def vm_terminated(_vm, scr, sel, test_queue):
239 updated = False
240 for stream in [_vm['proc'].stdout, _vm['proc'].stderr]:
241 sel.unregister(stream)
242 _vm['proc'] = None
243 scr.move(_vm['idx'] + 1, 10)
244 scr.clrtoeol()
245 log = '{}/{}.srv.{}/console'.format(dir, timestamp, _vm['idx'] + 1)
246 with open(log, 'r') as f:
247 if "Kernel panic" in f.read():
248 scr.addstr("kernel panic")
249 logger.info("VM[%d] kernel panic" % _vm['idx'])
250 updated = True
251 if test_queue:
252 num_vm = 0
253 for i in range(num_servers):
254 if _vm['proc']:
255 num_vm += 1
256 if len(test_queue) > num_vm:
257 scr.addstr("unexpected exit")
258 logger.info("VM[%d] unexpected exit" % i)
259 updated = True
260 return updated
261
262 def update_screen(scr, total_tests):
263 scr.move(num_servers + 1, 10)
264 scr.clrtoeol()
265 scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
266 scr.addstr(num_servers + 1, 20,
267 "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
268 failed = get_failed(vm)
269 if len(failed) > 0:
270 scr.move(num_servers + 2, 0)
271 scr.clrtoeol()
272 scr.addstr("Failed test cases: ")
273 count = 0
274 for f in failed:
275 count += 1
276 if count > 30:
277 scr.addstr('...')
278 scr.clrtoeol()
279 break
280 scr.addstr(f)
281 scr.addstr(' ')
282 scr.refresh()
283
284 def show_progress(scr):
285 global num_servers
286 global vm
287 global dir
288 global timestamp
289 global tests
290 global first_run_failures
291 global total_started, total_passed, total_failed, total_skipped
292 global rerun_failures
293
294 sel = selectors.DefaultSelector()
295 total_tests = len(tests)
296 logger.info("Total tests: %d" % total_tests)
297 test_queue = [(t, 0) for t in tests]
298 start_vm(vm[0], sel)
299
300 scr.leaveok(1)
301 scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
302 for i in range(0, num_servers):
303 scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
304 status = "starting VM" if vm[i]['proc'] else "not yet started"
305 scr.addstr(i + 1, 10, status)
306 scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
307 scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
308 scr.refresh()
309
310 while True:
311 updated = False
312 events = sel.select(timeout=1)
313 for key, mask in events:
314 _vm = key.data
315 if not _vm['proc']:
316 continue
317 vm_read_stderr(_vm)
318 if vm_read_stdout(_vm, test_queue):
319 vm_next_step(_vm, scr, test_queue)
320 updated = True
321 vm_read_stderr(_vm)
322 if _vm['proc'].poll() is not None:
323 if vm_terminated(_vm, scr, sel, test_queue):
324 updated = True
325
326 running, run_update = check_vm_start(scr, sel, test_queue)
327 if updated or run_update:
328 update_screen(scr, total_tests)
329 if not running:
330 break
331 sel.close()
332
333 for i in range(num_servers):
334 if not vm[i]['proc']:
335 continue
336 vm[i]['proc'] = None
337 scr.move(i + 1, 10)
338 scr.clrtoeol()
339 scr.addstr("still running")
340 logger.info("VM[%d] still running" % i)
341
342 scr.refresh()
343 time.sleep(0.3)
344
345 def main():
346 import argparse
347 import os
348 global num_servers
349 global vm
350 global dir
351 global timestamp
352 global tests
353 global first_run_failures
354 global total_started, total_passed, total_failed, total_skipped
355 global rerun_failures
356
357 total_started = 0
358 total_passed = 0
359 total_failed = 0
360 total_skipped = 0
361
362 debug_level = logging.INFO
363 rerun_failures = True
364 timestamp = int(time.time())
365
366 scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
367
368 p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
369 p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
370 help="number of VMs to start")
371 p.add_argument('-f', dest='testmodules', metavar='<test module>',
372 help='execute only tests from these test modules',
373 type=str, nargs='+')
374 p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
375 help="don't retry failed tests automatically")
376 p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
377 help="enable debug logging")
378 p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
379 help="enable code coverage collection")
380 p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
381 help="shuffle test cases to randomize order")
382 p.add_argument('--short', dest='short', action='store_const', const=True,
383 default=False,
384 help="only run short-duration test cases")
385 p.add_argument('--long', dest='long', action='store_const', const=True,
386 default=False,
387 help="include long-duration test cases")
388 p.add_argument('--valgrind', dest='valgrind', action='store_const',
389 const=True, default=False,
390 help="run tests under valgrind")
391 p.add_argument('--telnet', dest='telnet', metavar='<baseport>', type=int,
392 help="enable telnet server inside VMs, specify the base port here")
393 p.add_argument('--nocurses', dest='nocurses', action='store_const',
394 const=True, default=False, help="Don't use curses for output")
395 p.add_argument('params', nargs='*')
396 args = p.parse_args()
397
398 dir = os.environ.get('HWSIM_TEST_LOG_DIR', '/tmp/hwsim-test-logs')
399 try:
400 os.makedirs(dir)
401 except OSError as e:
402 if e.errno != errno.EEXIST:
403 raise
404
405 num_servers = args.num_servers
406 rerun_failures = not args.no_retry
407 if args.debug:
408 debug_level = logging.DEBUG
409 extra_args = []
410 if args.valgrind:
411 extra_args += ['--valgrind']
412 if args.long:
413 extra_args += ['--long']
414 if args.codecov:
415 print("Code coverage - build separate binaries")
416 logdir = os.path.join(dir, str(timestamp))
417 os.makedirs(logdir)
418 subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
419 logdir])
420 codecov_args = ['--codecov_dir', logdir]
421 codecov = True
422 else:
423 codecov_args = []
424 codecov = False
425
426 first_run_failures = []
427 if args.params:
428 tests = args.params
429 else:
430 tests = []
431 cmd = [os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'), '-L']
432 if args.testmodules:
433 cmd += ["-f"]
434 cmd += args.testmodules
435 lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
436 for l in lst.stdout.readlines():
437 name = l.decode().split(' ')[0]
438 tests.append(name)
439 if len(tests) == 0:
440 sys.exit("No test cases selected")
441
442 if args.shuffle:
443 from random import shuffle
444 shuffle(tests)
445 elif num_servers > 2 and len(tests) > 100:
446 # Move test cases with long duration to the beginning as an
447 # optimization to avoid last part of the test execution running a long
448 # duration test case on a single VM while all other VMs have already
449 # completed their work.
450 for l in long_tests:
451 if l in tests:
452 tests.remove(l)
453 tests.insert(0, l)
454 if args.short:
455 tests = [t for t in tests if t not in long_tests]
456
457 logger.setLevel(debug_level)
458 if not args.nocurses:
459 log_handler = logging.FileHandler('parallel-vm.log')
460 else:
461 log_handler = logging.StreamHandler(sys.stdout)
462 log_handler.setLevel(debug_level)
463 fmt = "%(asctime)s %(levelname)s %(message)s"
464 log_formatter = logging.Formatter(fmt)
465 log_handler.setFormatter(log_formatter)
466 logger.addHandler(log_handler)
467
468 vm = {}
469 for i in range(0, num_servers):
470 cmd = [os.path.join(scriptsdir, 'vm-run.sh'),
471 '--timestamp', str(timestamp),
472 '--ext', 'srv.%d' % (i + 1),
473 '-i'] + codecov_args + extra_args
474 if args.telnet:
475 cmd += ['--telnet', str(args.telnet + i)]
476 vm[i] = {}
477 vm[i]['idx'] = i
478 vm[i]['starting'] = False
479 vm[i]['started'] = False
480 vm[i]['cmd'] = cmd
481 vm[i]['proc'] = None
482 vm[i]['out'] = ""
483 vm[i]['pending'] = ""
484 vm[i]['err'] = ""
485 vm[i]['failed'] = []
486 vm[i]['fail_seq'] = []
487 vm[i]['skip_reason'] = []
488 print('')
489
490 if not args.nocurses:
491 curses.wrapper(show_progress)
492 else:
493 class FakeScreen:
494 def leaveok(self, n):
495 pass
496 def refresh(self):
497 pass
498 def addstr(self, *args, **kw):
499 pass
500 def move(self, x, y):
501 pass
502 def clrtoeol(self):
503 pass
504 show_progress(FakeScreen())
505
506 with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
507 for i in range(0, num_servers):
508 f.write('VM {}\n{}\n{}\n'.format(i + 1, vm[i]['out'], vm[i]['err']))
509 first = True
510 for i in range(0, num_servers):
511 for line in vm[i]['out'].splitlines():
512 if line.startswith("FAIL "):
513 if first:
514 first = False
515 print("Logs for failed test cases:")
516 f.write("Logs for failed test cases:\n")
517 fname = "%s/%d.srv.%d/%s.log" % (dir, timestamp, i + 1,
518 line.split(' ')[1])
519 print(fname)
520 f.write("%s\n" % fname)
521
522 failed = get_failed(vm)
523
524 if first_run_failures:
525 print("To re-run same failure sequence(s):")
526 for i in range(0, num_servers):
527 if len(vm[i]['failed']) == 0:
528 continue
529 print("./vm-run.sh", end=' ')
530 if args.long:
531 print("--long", end=' ')
532 skip = len(vm[i]['fail_seq'])
533 skip -= min(skip, 30)
534 for t in vm[i]['fail_seq']:
535 if skip > 0:
536 skip -= 1
537 continue
538 print(t, end=' ')
539 print('')
540 print("Failed test cases:")
541 for f in first_run_failures:
542 print(f, end=' ')
543 logger.info("Failed: " + f)
544 print('')
545 double_failed = []
546 for name in failed:
547 double_failed.append(name)
548 for test in first_run_failures:
549 double_failed.remove(test)
550 if not rerun_failures:
551 pass
552 elif failed and not double_failed:
553 print("All failed cases passed on retry")
554 logger.info("All failed cases passed on retry")
555 elif double_failed:
556 print("Failed even on retry:")
557 for f in double_failed:
558 print(f, end=' ')
559 logger.info("Failed on retry: " + f)
560 print('')
561 res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
562 total_passed,
563 total_failed,
564 total_skipped)
565 print(res)
566 logger.info(res)
567 print("Logs: " + dir + '/' + str(timestamp))
568 logger.info("Logs: " + dir + '/' + str(timestamp))
569
570 skip_reason = []
571 for i in range(num_servers):
572 if not vm[i]['started']:
573 continue
574 skip_reason += vm[i]['skip_reason']
575 if len(vm[i]['pending']) > 0:
576 logger.info("Unprocessed stdout from VM[%d]: '%s'" %
577 (i, vm[i]['pending']))
578 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
579 with open(log, 'r') as f:
580 if "Kernel panic" in f.read():
581 print("Kernel panic in " + log)
582 logger.info("Kernel panic in " + log)
583 missing = {}
584 missing['OCV not supported'] = 'OCV'
585 missing['sigma_dut not available'] = 'sigma_dut'
586 missing['Skip test case with long duration due to --long not specified'] = 'long'
587 missing['TEST_ALLOC_FAIL not supported' ] = 'TEST_FAIL'
588 missing['TEST_ALLOC_FAIL not supported in the build'] = 'TEST_FAIL'
589 missing['TEST_FAIL not supported' ] = 'TEST_FAIL'
590 missing['veth not supported (kernel CONFIG_VETH)'] = 'KERNEL:CONFIG_VETH'
591 missing['WPA-EAP-SUITE-B-192 not supported'] = 'CONFIG_SUITEB192'
592 missing['WPA-EAP-SUITE-B not supported'] = 'CONFIG_SUITEB'
593 missing['wmediumd not available'] = 'wmediumd'
594 missing['DPP not supported'] = 'CONFIG_DPP'
595 missing['DPP version 2 not supported'] = 'CONFIG_DPP2'
596 missing['EAP method PWD not supported in the build'] = 'CONFIG_EAP_PWD'
597 missing['EAP method TEAP not supported in the build'] = 'CONFIG_EAP_TEAP'
598 missing['FILS not supported'] = 'CONFIG_FILS'
599 missing['FILS-SK-PFS not supported'] = 'CONFIG_FILS_SK_PFS'
600 missing['OWE not supported'] = 'CONFIG_OWE'
601 missing['SAE not supported'] = 'CONFIG_SAE'
602 missing['Not using OpenSSL'] = 'CONFIG_TLS=openssl'
603 missing['wpa_supplicant TLS library is not OpenSSL: internal'] = 'CONFIG_TLS=openssl'
604 missing_items = []
605 other_reasons = []
606 for reason in sorted(set(skip_reason)):
607 if reason in missing:
608 missing_items.append(missing[reason])
609 elif reason.startswith('OCSP-multi not supported with this TLS library'):
610 missing_items.append('OCSP-MULTI')
611 else:
612 other_reasons.append(reason)
613 if missing_items:
614 print("Missing items (SKIP):", missing_items)
615 if other_reasons:
616 print("Other skip reasons:", other_reasons)
617
618 if codecov:
619 print("Code coverage - preparing report")
620 for i in range(num_servers):
621 subprocess.check_call([os.path.join(scriptsdir,
622 'process-codecov.sh'),
623 logdir + ".srv.%d" % (i + 1),
624 str(i)])
625 subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
626 logdir])
627 print("file://%s/index.html" % logdir)
628 logger.info("Code coverage report: file://%s/index.html" % logdir)
629
630 if double_failed or (failed and not rerun_failures):
631 logger.info("Test run complete - failures found")
632 sys.exit(2)
633 if failed:
634 logger.info("Test run complete - failures found on first run; passed on retry")
635 sys.exit(1)
636 logger.info("Test run complete - no failures")
637 sys.exit(0)
638
639 if __name__ == "__main__":
640 main()