From: Jouni Malinen Date: Wed, 18 Dec 2024 10:43:32 +0000 (+0200) Subject: tests: Terminate UML VM automatically if it seems to have stopped X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3e7e6b93589f349913668e0ea4cf0efe000080a4;p=thirdparty%2Fhostap.git tests: Terminate UML VM automatically if it seems to have stopped There have been number of cases in which a UML VM seems to hang. Make parallel-vm.py track how long it has been since last stdout input from a VM and terminate the VM using uml_mconsole if there has been no updates in ten seconds. This is in use only with UML, i.e., only if time-travel is enabled, so 120 seconds of real calendar time should be enough time for any test case to be completed. Signed-off-by: Jouni Malinen --- diff --git a/tests/hwsim/vm/parallel-vm.py b/tests/hwsim/vm/parallel-vm.py index 2a5f9b5ed..83859b356 100755 --- a/tests/hwsim/vm/parallel-vm.py +++ b/tests/hwsim/vm/parallel-vm.py @@ -13,6 +13,7 @@ import logging import multiprocessing import os import selectors +import shutil import subprocess import sys import time @@ -123,6 +124,7 @@ def vm_read_stdout(vm, test_queue): if e.errno == errno.EAGAIN: return False raise + vm['last_stdout'] = time.time() logger.debug("VM[%d] stdout.read[%s]" % (vm['idx'], out.rstrip())) pending = vm['pending'] + out lines = [] @@ -350,6 +352,12 @@ def update_screen(scr, total_tests): scr.clrtoeol() scr.refresh() +def has_uml_mconsole(_vm): + if not shutil.which('uml_mconsole'): + return False + dir = os.path.join(os.path.expanduser('~/.uml'), 'hwsim-' + _vm['DATE']) + return os.path.exists(dir) + def show_progress(scr): global num_servers global vm @@ -401,6 +409,39 @@ def show_progress(scr): update_screen(scr, total_tests) if not running: break + + status_line = num_servers + if status_line >= max_y: + status_line = max_y - 1 + max_y, max_x = scr.getmaxyx() + updated = False + + now = time.time() + for i in range(num_servers): + _vm = vm[i] + if not _vm['proc']: + continue + last = _vm['last_stdout'] + if last and now - last > 10: + if _vm['idx'] < status_line: + scr.move(_vm['idx'], max_x - 25) + scr.clrtoeol() + scr.addstr("(no update in %d s)" % (now - last)) + updated = True + if has_uml_mconsole(_vm) and last and now - last > 120: + if _vm['idx'] < status_line: + scr.move(_vm['idx'], 10) + scr.clrtoeol() + scr.addstr("terminating due to no updates received") + logger.info("Kill hung VM[%d]" % _vm['idx']) + subprocess.call(['uml_mconsole', 'hwsim-' + _vm['DATE'], + 'log', 'Halting due to no progress'], + stdout=open('/dev/null', 'w')) + subprocess.call(['uml_mconsole', 'hwsim-' + _vm['DATE'], + 'halt'], stdout=open('/dev/null', 'w')) + if updated: + scr.refresh() + sel.close() for i in range(num_servers): @@ -577,6 +618,7 @@ def main(): cmd += ['--telnet', str(args.telnet + i)] vm[i] = {} vm[i]['idx'] = i + vm[i]['DATE'] = str(timestamp) + '.srv.' + str(i + 1) vm[i]['starting'] = False vm[i]['started'] = False vm[i]['cmd'] = cmd @@ -588,6 +630,7 @@ def main(): vm[i]['fail_seq'] = [] vm[i]['skip_reason'] = [] vm[i]['current_name'] = None + vm[i]['last_stdout'] = None print('') if not args.nocurses: diff --git a/tests/hwsim/vm/vm-run.sh b/tests/hwsim/vm/vm-run.sh index e891676a4..3642690af 100755 --- a/tests/hwsim/vm/vm-run.sh +++ b/tests/hwsim/vm/vm-run.sh @@ -168,6 +168,7 @@ A+="ro" if [ -z $KVM ]; then UML_ARGS="mem=${MEMORY}M \ LOGDIR=$LOGDIR \ + umid=hwsim-$DATE \ time-travel=inf-cpu \ $A \ root=none hostfs=/ rootfstype=hostfs rootflags=/ \