tests/hwsim/vm/parallel-vm.py

   1 #!/usr/bin/env python2
   2 #
   3 # Parallel VM test case executor
   4 # Copyright (c) 2014-2015, Jouni Malinen <j@w1.fi>
   5 #
   6 # This software may be distributed under the terms of the BSD license.
   7 # See README for more details.
   8
   9 import curses
  10 import fcntl
  11 import logging
  12 import os
  13 import subprocess
  14 import sys
  15 import time
  16
  17 logger = logging.getLogger()
  18
  19 # Test cases that take significantly longer time to execute than average.
  20 long_tests = [ "ap_roam_open",
  21                "wpas_mesh_password_mismatch_retry",
  22                "wpas_mesh_password_mismatch",
  23                "hostapd_oom_wpa2_psk_connect",
  24                "ap_hs20_fetch_osu_stop",
  25                "ap_roam_wpa2_psk",
  26                "ibss_wpa_none_ccmp",
  27                "nfc_wps_er_handover_pk_hash_mismatch_sta",
  28                "go_neg_peers_force_diff_freq",
  29                "p2p_cli_invite",
  30                "sta_ap_scan_2b",
  31                "ap_pmf_sta_unprot_deauth_burst",
  32                "ap_bss_add_remove_during_ht_scan",
  33                "wext_scan_hidden",
  34                "autoscan_exponential",
  35                "nfc_p2p_client",
  36                "wnm_bss_keep_alive",
  37                "ap_inactivity_disconnect",
  38                "scan_bss_expiration_age",
  39                "autoscan_periodic",
  40                "discovery_group_client",
  41                "concurrent_p2pcli",
  42                "ap_bss_add_remove",
  43                "wpas_ap_wps",
  44                "wext_pmksa_cache",
  45                "ibss_wpa_none",
  46                "ap_ht_40mhz_intolerant_ap",
  47                "ibss_rsn",
  48                "discovery_pd_retries",
  49                "ap_wps_setup_locked_timeout",
  50                "ap_vht160",
  51                "dfs_radar",
  52                "dfs",
  53                "grpform_cred_ready_timeout",
  54                "hostapd_oom_wpa2_eap_connect",
  55                "wpas_ap_dfs",
  56                "autogo_many",
  57                "hostapd_oom_wpa2_eap",
  58                "ibss_open",
  59                "proxyarp_open_ebtables",
  60                "radius_failover",
  61                "obss_scan_40_intolerant",
  62                "dbus_connect_oom",
  63                "proxyarp_open",
  64                "ap_wps_iteration",
  65                "ap_wps_pbc_timeout" ]
  66
  67 def get_failed(vm):
  68     failed = []
  69     for i in range(num_servers):
  70         failed += vm[i]['failed']
  71     return failed
  72
  73 def vm_read_stdout(vm, i):
  74     global total_started, total_passed, total_failed, total_skipped
  75     global rerun_failures
  76
  77     ready = False
  78     try:
  79         out = vm['proc'].stdout.read()
  80     except:
  81         return False
  82     logger.debug("VM[%d] stdout.read[%s]" % (i, out))
  83     pending = vm['pending'] + out
  84     lines = []
  85     while True:
  86         pos = pending.find('\n')
  87         if pos < 0:
  88             break
  89         line = pending[0:pos].rstrip()
  90         pending = pending[(pos + 1):]
  91         logger.debug("VM[%d] stdout full line[%s]" % (i, line))
  92         if line.startswith("READY"):
  93             ready = True
  94         elif line.startswith("PASS"):
  95             ready = True
  96             total_passed += 1
  97         elif line.startswith("FAIL"):
  98             ready = True
  99             total_failed += 1
 100             name = line.split(' ')[1]
 101             logger.debug("VM[%d] test case failed: %s" % (i, name))
 102             vm['failed'].append(name)
 103         elif line.startswith("NOT-FOUND"):
 104             ready = True
 105             total_failed += 1
 106             logger.info("VM[%d] test case not found" % i)
 107         elif line.startswith("SKIP"):
 108             ready = True
 109             total_skipped += 1
 110         elif line.startswith("START"):
 111             total_started += 1
 112         vm['out'] += line + '\n'
 113         lines.append(line)
 114     vm['pending'] = pending
 115     return ready
 116
 117 def show_progress(scr):
 118     global num_servers
 119     global vm
 120     global dir
 121     global timestamp
 122     global tests
 123     global first_run_failures
 124     global total_started, total_passed, total_failed, total_skipped
 125
 126     total_tests = len(tests)
 127     logger.info("Total tests: %d" % total_tests)
 128
 129     scr.leaveok(1)
 130     scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
 131     for i in range(0, num_servers):
 132         scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
 133         scr.addstr(i + 1, 10, "starting VM")
 134     scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
 135     scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
 136     scr.refresh()
 137
 138     completed_first_pass = False
 139     rerun_tests = []
 140
 141     while True:
 142         running = False
 143         first_running = False
 144         updated = False
 145
 146         for i in range(0, num_servers):
 147             if completed_first_pass:
 148                 continue
 149             if vm[i]['first_run_done']:
 150                 continue
 151             if not vm[i]['proc']:
 152                 continue
 153             if vm[i]['proc'].poll() is not None:
 154                 vm[i]['proc'] = None
 155                 scr.move(i + 1, 10)
 156                 scr.clrtoeol()
 157                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
 158                 with open(log, 'r') as f:
 159                     if "Kernel panic" in f.read():
 160                         scr.addstr("kernel panic")
 161                         logger.info("VM[%d] kernel panic" % i)
 162                     else:
 163                         scr.addstr("unexpected exit")
 164                         logger.info("VM[%d] unexpected exit" % i)
 165                 updated = True
 166                 continue
 167
 168             running = True
 169             first_running = True
 170             try:
 171                 err = vm[i]['proc'].stderr.read()
 172                 vm[i]['err'] += err
 173                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
 174             except:
 175                 pass
 176
 177             if vm_read_stdout(vm[i], i):
 178                 scr.move(i + 1, 10)
 179                 scr.clrtoeol()
 180                 updated = True
 181                 if not tests:
 182                     vm[i]['first_run_done'] = True
 183                     scr.addstr("completed first round")
 184                     logger.info("VM[%d] completed first round" % i)
 185                     continue
 186                 else:
 187                     name = tests.pop(0)
 188                     vm[i]['proc'].stdin.write(name + '\n')
 189                     scr.addstr(name)
 190                     logger.debug("VM[%d] start test %s" % (i, name))
 191
 192         if not first_running and not completed_first_pass:
 193             logger.info("First round of testing completed")
 194             if tests:
 195                 logger.info("Unexpected test cases remaining from first round: " + str(tests))
 196                 raise Exception("Unexpected test cases remaining from first round")
 197             completed_first_pass = True
 198             for name in get_failed(vm):
 199                 if rerun_failures:
 200                     rerun_tests.append(name)
 201                 first_run_failures.append(name)
 202
 203         for i in range(num_servers):
 204             if not completed_first_pass:
 205                 continue
 206             if not vm[i]['proc']:
 207                 continue
 208             if vm[i]['proc'].poll() is not None:
 209                 vm[i]['proc'] = None
 210                 scr.move(i + 1, 10)
 211                 scr.clrtoeol()
 212                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
 213                 with open(log, 'r') as f:
 214                     if "Kernel panic" in f.read():
 215                         scr.addstr("kernel panic")
 216                         logger.info("VM[%d] kernel panic" % i)
 217                     else:
 218                         scr.addstr("completed run")
 219                         logger.info("VM[%d] completed run" % i)
 220                 updated = True
 221                 continue
 222
 223             running = True
 224             try:
 225                 err = vm[i]['proc'].stderr.read()
 226                 vm[i]['err'] += err
 227                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
 228             except:
 229                 pass
 230
 231             ready = False
 232             if vm[i]['first_run_done']:
 233                 vm[i]['first_run_done'] = False
 234                 ready = True
 235             else:
 236                 ready = vm_read_stdout(vm[i], i)
 237             if ready:
 238                 scr.move(i + 1, 10)
 239                 scr.clrtoeol()
 240                 updated = True
 241                 if not rerun_tests:
 242                     vm[i]['proc'].stdin.write('\n')
 243                     scr.addstr("shutting down")
 244                     logger.info("VM[%d] shutting down" % i)
 245                 else:
 246                     name = rerun_tests.pop(0)
 247                     vm[i]['proc'].stdin.write(name + '\n')
 248                     scr.addstr(name + "(*)")
 249                     logger.debug("VM[%d] start test %s (*)" % (i, name))
 250
 251         if not running:
 252             break
 253
 254         if updated:
 255             scr.move(num_servers + 1, 10)
 256             scr.clrtoeol()
 257             scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
 258             scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
 259             failed = get_failed(vm)
 260             if len(failed) > 0:
 261                 scr.move(num_servers + 2, 0)
 262                 scr.clrtoeol()
 263                 scr.addstr("Failed test cases: ")
 264                 count = 0
 265                 for f in failed:
 266                     count += 1
 267                     if count > 30:
 268                         scr.addstr('...')
 269                         scr.clrtoeol()
 270                         break
 271                     scr.addstr(f)
 272                     scr.addstr(' ')
 273
 274             scr.move(0, 35)
 275             scr.clrtoeol()
 276             if rerun_tests:
 277                 scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
 278             elif rerun_failures:
 279                 pass
 280             elif first_run_failures:
 281                 scr.addstr("(RETRY FAILED)")
 282
 283             scr.refresh()
 284
 285         time.sleep(0.25)
 286
 287     scr.refresh()
 288     time.sleep(0.3)
 289
 290 def main():
 291     import argparse
 292     import os
 293     global num_servers
 294     global vm
 295     global dir
 296     global timestamp
 297     global tests
 298     global first_run_failures
 299     global total_started, total_passed, total_failed, total_skipped
 300     global rerun_failures
 301
 302     total_started = 0
 303     total_passed = 0
 304     total_failed = 0
 305     total_skipped = 0
 306
 307     debug_level = logging.INFO
 308     rerun_failures = True
 309     timestamp = int(time.time())
 310
 311     scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
 312
 313     p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
 314     p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
 315                    help="number of VMs to start")
 316     p.add_argument('-f', dest='testmodules', metavar='<test module>',
 317                    help='execute only tests from these test modules',
 318                    type=str, nargs='+')
 319     p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
 320                    help="don't retry failed tests automatically")
 321     p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
 322                    help="enable debug logging")
 323     p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
 324                    help="enable code coverage collection")
 325     p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
 326                    help="shuffle test cases to randomize order")
 327     p.add_argument('--short', dest='short', action='store_const', const=True,
 328                    default=False,
 329                    help="only run short-duration test cases")
 330     p.add_argument('--long', dest='long', action='store_const', const=True,
 331                    default=False,
 332                    help="include long-duration test cases")
 333     p.add_argument('--valgrind', dest='valgrind', action='store_const',
 334                    const=True, default=False,
 335                    help="run tests under valgrind")
 336     p.add_argument('params', nargs='*')
 337     args = p.parse_args()
 338     num_servers = args.num_servers
 339     rerun_failures = not args.no_retry
 340     if args.debug:
 341         debug_level = logging.DEBUG
 342     extra_args = []
 343     if args.valgrind:
 344         extra_args += [ '--valgrind' ]
 345     if args.long:
 346         extra_args += [ '--long' ]
 347     if args.codecov:
 348         print "Code coverage - build separate binaries"
 349         logdir = "/tmp/hwsim-test-logs/" + str(timestamp)
 350         os.makedirs(logdir)
 351         subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
 352                                logdir])
 353         codecov_args = ['--codecov_dir', logdir]
 354         codecov = True
 355     else:
 356         codecov_args = []
 357         codecov = False
 358
 359     first_run_failures = []
 360     tests = []
 361     cmd = [ os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'), '-L' ]
 362     if args.testmodules:
 363         cmd += [ "-f" ]
 364         cmd += args.testmodules
 365     lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 366     for l in lst.stdout.readlines():
 367         name = l.split(' ')[0]
 368         tests.append(name)
 369     if len(tests) == 0:
 370         sys.exit("No test cases selected")
 371
 372     dir = '/tmp/hwsim-test-logs'
 373     try:
 374         os.mkdir(dir)
 375     except:
 376         pass
 377
 378     if args.shuffle:
 379         from random import shuffle
 380         shuffle(tests)
 381     elif num_servers > 2 and len(tests) > 100:
 382         # Move test cases with long duration to the beginning as an
 383         # optimization to avoid last part of the test execution running a long
 384         # duration test case on a single VM while all other VMs have already
 385         # completed their work.
 386         for l in long_tests:
 387             if l in tests:
 388                 tests.remove(l)
 389                 tests.insert(0, l)
 390     if args.short:
 391         tests = [t for t in tests if t not in long_tests]
 392
 393     logger.setLevel(debug_level)
 394     log_handler = logging.FileHandler('parallel-vm.log')
 395     log_handler.setLevel(debug_level)
 396     fmt = "%(asctime)s %(levelname)s %(message)s"
 397     log_formatter = logging.Formatter(fmt)
 398     log_handler.setFormatter(log_formatter)
 399     logger.addHandler(log_handler)
 400
 401     vm = {}
 402     for i in range(0, num_servers):
 403         print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
 404         logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
 405         cmd = [os.path.join(scriptsdir, 'vm-run.sh'), '--delay', str(i),
 406                '--timestamp', str(timestamp),
 407                '--ext', 'srv.%d' % (i + 1),
 408                '-i'] + codecov_args + extra_args
 409         vm[i] = {}
 410         vm[i]['first_run_done'] = False
 411         vm[i]['proc'] = subprocess.Popen(cmd,
 412                                          stdin=subprocess.PIPE,
 413                                          stdout=subprocess.PIPE,
 414                                          stderr=subprocess.PIPE)
 415         vm[i]['out'] = ""
 416         vm[i]['pending'] = ""
 417         vm[i]['err'] = ""
 418         vm[i]['failed'] = []
 419         for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
 420             fd = stream.fileno()
 421             fl = fcntl.fcntl(fd, fcntl.F_GETFL)
 422             fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
 423     print
 424
 425     curses.wrapper(show_progress)
 426
 427     with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
 428         for i in range(0, num_servers):
 429             f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
 430
 431     failed = get_failed(vm)
 432
 433     if first_run_failures:
 434         print "Failed test cases:"
 435         for f in first_run_failures:
 436             print f,
 437             logger.info("Failed: " + f)
 438         print
 439     double_failed = []
 440     for name in failed:
 441         double_failed.append(name)
 442     for test in first_run_failures:
 443         double_failed.remove(test)
 444     if not rerun_failures:
 445         pass
 446     elif failed and not double_failed:
 447         print "All failed cases passed on retry"
 448         logger.info("All failed cases passed on retry")
 449     elif double_failed:
 450         print "Failed even on retry:"
 451         for f in double_failed:
 452             print f,
 453             logger.info("Failed on retry: " + f)
 454         print
 455     res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
 456                                                     total_passed,
 457                                                     total_failed,
 458                                                     total_skipped)
 459     print(res)
 460     logger.info(res)
 461     print "Logs: " + dir + '/' + str(timestamp)
 462     logger.info("Logs: " + dir + '/' + str(timestamp))
 463
 464     for i in range(0, num_servers):
 465         if len(vm[i]['pending']) > 0:
 466             logger.info("Unprocessed stdout from VM[%d]: '%s'" %
 467                         (i, vm[i]['pending']))
 468         log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
 469         with open(log, 'r') as f:
 470             if "Kernel panic" in f.read():
 471                 print "Kernel panic in " + log
 472                 logger.info("Kernel panic in " + log)
 473
 474     if codecov:
 475         print "Code coverage - preparing report"
 476         for i in range(num_servers):
 477             subprocess.check_call([os.path.join(scriptsdir,
 478                                                 'process-codecov.sh'),
 479                                    logdir + ".srv.%d" % (i + 1),
 480                                    str(i)])
 481         subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
 482                                logdir])
 483         print "file://%s/index.html" % logdir
 484         logger.info("Code coverage report: file://%s/index.html" % logdir)
 485
 486     if double_failed or (failed and not rerun_failures):
 487         logger.info("Test run complete - failures found")
 488         sys.exit(2)
 489     if failed:
 490         logger.info("Test run complete - failures found on first run; passed on retry")
 491         sys.exit(1)
 492     logger.info("Test run complete - no failures")
 493     sys.exit(0)
 494
 495 if __name__ == "__main__":
 496     main()