From: Ralf Wildenhues Date: Thu, 2 Oct 2008 05:48:51 +0000 (+0200) Subject: Implement parallel Autotest test execution: testsuite --jobs. X-Git-Tag: v2.63b~291 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f108ed8925aa69abd56da906ec65c328c15dcec5;p=thirdparty%2Fautoconf.git Implement parallel Autotest test execution: testsuite --jobs. * lib/autotest/general.m4 (AT_JOB_FIFO_FD): New macro. (AT_INIT): : New variable. Accept -j, -jN, --jobs[=N], document them in --help output. Implement parallel driver loop using a FIFO, enabled with --jobs and if mkfifo works; otherwise, fall back to sequential loop. (AT_SETUP): Store, do not output summary progress line if parallel. * tests/autotest.at (parallel test execution, parallel truth) (parallel fallacy, parallel skip): New tests. * doc/autoconf.texi (testsuite Invocation): Document -j, --jobs, the mkfifo requirement, and that --errexit may cause concurrent jobs to finish. * NEWS: Update. Signed-off-by: Ralf Wildenhues --- diff --git a/NEWS b/NEWS index 0b027a12..c34bedb4 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,9 @@ GNU Autoconf NEWS - User visible changes. ** AC_LANG_ERLANG works once again (regression introduced in 2.61a). +** Autotest testsuites accept an option --jobs[=N] for parallel testing. + + * Major changes in Autoconf 2.63 (2008-09-09) [stable] Released by Eric Blake, based on git versions 2.62.*. diff --git a/doc/autoconf.texi b/doc/autoconf.texi index fa9b6efc..e515a870 100644 --- a/doc/autoconf.texi +++ b/doc/autoconf.texi @@ -20981,6 +20981,23 @@ Change the current directory to @var{dir} before creating any files. Useful for running the testsuite in a subdirectory from a top-level Makefile. +@item --jobs@r{[}=@var{n}@r{]} +@itemx -j@ovar{n} +Run @var{n} tests in parallel, if possible. If @var{n} is not given, +run all given tests in parallel. Note that there should be no space +before the argument to @option{-j}, as @option{-j @var{number}} denotes +the separate arguments @option{-j} and @option{@var{number}}, see below. + +In parallel mode, the standard input device of the testsuite script is +not available to commands inside a test group. Furthermore, banner +lines are not printed, and the summary line for each test group is +output after the test group completes. Summary lines may appear +unordered. If verbose and trace output are enabled (see below), they +may appear intermixed from concurrently running tests. + +Parallel mode requires the @command{mkfifo} command to work, and will be +silently disabled otherwise. + @item --clean @itemx -c Remove all the files the test suite might have created and exit. Meant @@ -21058,6 +21075,8 @@ If any test fails, immediately abort testing. It implies @option{--debug}: post test group clean up, and top-level logging are inhibited. This option is meant for the full test suite, it is not really useful for generated debugging scripts. +If the testsuite is run in parallel mode using @option{--jobs}, +then concurrently running tests will finish before exiting. @item --verbose @itemx -v diff --git a/lib/autotest/general.m4 b/lib/autotest/general.m4 index 90fce9fb..4683df49 100644 --- a/lib/autotest/general.m4 +++ b/lib/autotest/general.m4 @@ -217,6 +217,7 @@ m4_foreach([AT_name], [_AT_DEFINE_INIT_LIST], [m4_popdef(m4_defn([AT_name]))]) m4_wrap([_AT_FINISH]) dnl Define FDs. m4_define([AS_MESSAGE_LOG_FD], [5]) +m4_define([AT_JOB_FIFO_FD], [6]) AS_INIT[]dnl m4_divert_push([DEFAULTS])dnl AT_COPYRIGHT( @@ -399,6 +400,8 @@ at_errexit_p=false # Shall we be verbose? ':' means no, empty means yes. at_verbose=: at_quiet= +# Running several jobs in parallel, 0 means as many as test groups. +at_jobs=1 # Shall we keep the debug scripts? Must be `:' when the suite is # run by a debug script, so that the script doesn't remove itself. @@ -569,6 +572,22 @@ do at_dir=$at_optarg ;; + # Parallel execution. + --jobs | -j ) + at_jobs=0 + ;; + --jobs=* | -j[[0-9]]* ) + if test -n "$at_optarg"; then + at_jobs=$at_optarg + else + at_jobs=`expr X$at_option : 'X-j\(.*\)'` + fi + case $at_jobs in *[[!0-9]]*) + at_optname=`echo " $at_option" | sed 's/^ //; s/[[0-9=]].*//'` + AS_ERROR([non-numeric argument to $at_optname: $at_jobs]) ;; + esac + ;; + # Keywords. --keywords | -k ) at_prev=--keywords @@ -673,6 +692,8 @@ dnl extra quoting prevents emacs whitespace mode from putting tabs in output Execution tuning: -C, --directory=DIR [ change to directory DIR before starting] + -j, --jobs[[=N]] +[ Allow N jobs at once; infinite jobs with no arg (default 1)] -k, --keywords=KEYWORDS [ select the tests matching all the comma-separated KEYWORDS] [ multiple \`-k' accumulate; prefixed \`!' negates a KEYWORD] @@ -813,6 +834,8 @@ at_suite_log=$at_dir/$as_me.log at_helper_dir=$at_suite_dir/at-groups # Stop file: if it exists, do not start new jobs. at_stop_file=$at_suite_dir/at-stop +# The fifo used for the job dispatcher. +at_job_fifo=$at_suite_dir/at-job-fifo if $at_clean; then test -d "$at_suite_dir" && @@ -993,6 +1016,18 @@ BEGIN { FS="" } AS_ERROR([cannot create test line number cache]) rm -f "$at_suite_dir/at-source-lines" +# Set number of jobs for `-j'; avoid more jobs than test groups. +set X $at_groups; shift; at_max_jobs=$[@%:@] +if test $at_jobs -eq 0 || test $at_jobs -gt $at_max_jobs; then + at_jobs=$at_max_jobs +fi + +# If parallel mode, don't output banners, don't split summary lines. +if test $at_jobs -ne 1; then + at_print_banners=false + at_quiet=: +fi + # Set up helper dirs. rm -rf "$at_helper_dir" && mkdir "$at_helper_dir" && @@ -1101,8 +1136,13 @@ _ATEOF ;; esac echo "$at_res" > "$at_job_dir/$at_res" - # Make sure there is a separator even with long titles. - AS_ECHO([" $at_msg"]) + # In parallel mode, output the summary line only afterwards. + if test $at_jobs -ne 1 && test -n "$at_verbose"; then + AS_ECHO(["$at_desc_line $at_msg"]) + else + # Make sure there is a separator even with long titles. + AS_ECHO([" $at_msg"]) + fi at_log_msg="$at_group. $at_desc ($at_setup_line): $at_msg" case $at_status in 0|77) @@ -1148,20 +1188,74 @@ _ATEOF m4_text_box([Driver loop.]) rm -f "$at_stop_file" +trap 'exit_status=$? + echo "signal received, bailing out" >&2 + echo stop > "$at_stop_file" + exit $exit_status' 1 2 13 15 at_first=: -for at_group in $at_groups; do - at_func_group_prepare - if cd "$at_group_dir" && - at_func_test $at_group && - . "$at_test_source"; then :; else - AS_WARN([unable to parse test group: $at_group]) - at_failed=: +if test $at_jobs -ne 1 && + rm -f "$at_job_fifo" && + ( mkfifo "$at_job_fifo" ) 2>/dev/null && + exec AT_JOB_FIFO_FD<> "$at_job_fifo" +then + # FIFO job dispatcher. + echo + # Turn jobs into a list of numbers, starting from 1. + at_joblist=`AS_ECHO([" $at_groups_all "]) | \ + sed 's/\( '$at_jobs'\) .*/\1/'` + + set X $at_joblist + shift + for at_group in $at_groups; do + ( + # Start one test group. + at_func_group_prepare + if cd "$at_group_dir" && + at_func_test $at_group && + . "$at_test_source" # AT_JOB_FIFO_FD<&- + then :; else + AS_WARN([unable to parse test group: $at_group]) + at_failed=: + fi + at_func_group_postprocess + echo token >&AT_JOB_FIFO_FD + ) & + shift # Consume one token. + if test $[@%:@] -gt 0; then :; else + read at_token <&AT_JOB_FIFO_FD || break + set x $[*] + fi + test -f "$at_stop_file" && break + at_first=false + done + # Read back the remaining ($at_jobs - 1) tokens. + set X $at_joblist + shift + if test $[@%:@] -gt 0; then + shift + for at_job + do + read at_token + done <&AT_JOB_FIFO_FD fi - at_func_group_postprocess - test -f "$at_stop_file" && break - at_first=false -done + exec AT_JOB_FIFO_FD<&- + wait +else + # Run serially, avoid forks and other potential surprises. + for at_group in $at_groups; do + at_func_group_prepare + if cd "$at_group_dir" && + at_func_test $at_group && + . "$at_test_source"; then :; else + AS_WARN([unable to parse test group: $at_group]) + at_failed=: + fi + at_func_group_postprocess + test -f "$at_stop_file" && break + at_first=false + done +fi # Wrap up the test suite with summary statistics. cd "$at_helper_dir" @@ -1530,8 +1624,9 @@ at_setup_line='m4_defn([AT_line])' m4_if(AT_banner_ordinal, [0], [], [at_func_banner AT_banner_ordinal ])dnl at_desc="AS_ESCAPE(m4_dquote(m4_defn([AT_description])))" -$at_quiet AS_ECHO_N([m4_format(["%3d: $at_desc%*s"], AT_ordinal, - m4_max(0, m4_eval(47 - m4_qlen(m4_defn([AT_description])))), [])]) +at_desc_line=m4_format(["%3d: $at_desc%*s"], AT_ordinal, + m4_max(0, m4_eval(47 - m4_qlen(m4_defn([AT_description])))), []) +$at_quiet AS_ECHO_N(["$at_desc_line"]) m4_divert_push([TEST_SCRIPT])dnl ]) diff --git a/tests/autotest.at b/tests/autotest.at index c4c0eda6..a20c4acf 100644 --- a/tests/autotest.at +++ b/tests/autotest.at @@ -773,6 +773,127 @@ AT_CHECK_KEYS([--list -k none -k first], [none|first], [2], [second|both], [0]) AT_CLEANUP +## ----------------------- ## +## parallel test execution ## +## ----------------------- ## + +AT_SETUP([parallel test execution]) + +# This test tries to ensure that -j runs tests in parallel. +# Such a test is inherently racy, because there are no real-time +# guarantees about scheduling delays. So we try to minimize +# the chance to lose the race. + +# The time needed for a micro-suite consisting of NTESTS tests each +# sleeping for a second is estimated by +# startup + ntests * (serial_overhead + 1 / njobs) +# +# in absence of major scheduling delays. This leads to side conditions: +# - NTESTS should be high, so the STARTUP time is small compared to the +# test run time, and scheduling delays can even out; it should not be +# too high, to not slow down the testsuite unnecessarily, +# - the number of concurrent jobs NJOBS should not be too low, so the +# race is not lost so easily; it should not be too high, to avoid fork +# failures on tightly limited systems. 4 seems a good compromise +# here, considering that Autotest spawns several other processes. +# - STARTUP is assumed to be the same for parallel and serial runs, so +# the latter can estimate the former. +# - To avoid unportable output from time measurement commands, spawn +# both a parallel and a serial testsuite run; check that the former +# completes before the latter has completed a fraction SERIAL_NTESTS +# of the tests (the serial run is executed in a subdirectory), plus +# some additional time to allow for compensation of SERIAL_OVERHEAD. +# - when adding this time to the serial test execution, an initial delay +# SERIAL_DELAY of the serial test helps to avoid unreliable scheduling +# due to the startup burst of the suites. + +dnl total number of tests. +m4_define([AT_PARALLEL_NTESTS], [16]) +dnl number of jobs to run in parallel. +m4_define([AT_PARALLEL_NJOBS], [4]) +dnl number of tests to run serially, as comparison. +m4_define([AT_PARALLEL_SERIAL_NTESTS], + m4_eval(AT_PARALLEL_NTESTS / AT_PARALLEL_NJOBS)) +dnl initial delay of serial run, to compensate for SERIAL_OVERHEAD. +dnl This corresponds to 0.67 s of overhead per test. +m4_define([AT_PARALLEL_SERIAL_DELAY], + m4_eval((AT_PARALLEL_NTESTS - AT_PARALLEL_SERIAL_NTESTS + 1) * 2 / 3)) + + +AT_CHECK_AT_PREP([micro-suite], +[[AT_INIT([suite to test parallel execution]) +m4_for([count], [1], ]]AT_PARALLEL_NTESTS[[, [], + [AT_SETUP([test number count]) + AT_CHECK([sleep 1]) + AT_CLEANUP +]) +]]) + +AT_CHECK([$CONFIG_SHELL ./micro-suite --help | grep " --jobs"], [0], [ignore]) +AT_CHECK([$CONFIG_SHELL ./micro-suite -j2foo], [1], [], [stderr]) +AT_CHECK([grep 'non-numeric argument' stderr], [], [ignore]) +AT_CHECK([$CONFIG_SHELL ./micro-suite --jobs=foo], [1], [], [stderr]) +AT_CHECK([grep 'non-numeric argument' stderr], [], [ignore]) +AT_CHECK([$CONFIG_SHELL ./micro-suite -j[]AT_PARALLEL_NJOBS], [], [stdout]) +# Ensure that all tests run, and lines are not split. +AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [AT_PARALLEL_NTESTS +]) +# Running one test with -j should produce correctly formatted output: +AT_CHECK([$CONFIG_SHELL ./micro-suite -j 3], [], [stdout]) +AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [1 +]) +# Specifying more jobs than tests should not hang: +AT_CHECK([$CONFIG_SHELL ./micro-suite -j3 3], [], [stdout]) +AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [1 +]) + +# The parallel scheduler requires mkfifo to work. +AT_CHECK([mkfifo fifo || exit 77]) +mkdir serial + +# Unfortunately, the return value of wait is unreliable, +# so we check that kill fails. +AT_CHECK([$CONFIG_SHELL ./micro-suite --jobs=[]AT_PARALLEL_NJOBS & ]dnl + [sleep AT_PARALLEL_SERIAL_DELAY && ]dnl + [cd serial && $CONFIG_SHELL ../micro-suite -AT_PARALLEL_SERIAL_NTESTS >/dev/null && ]dnl + [{ kill $! && exit 1; :; }], [], [stdout], [ignore]) +AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [AT_PARALLEL_NTESTS +]) +AT_CHECK([grep 'AT_PARALLEL_NTESTS tests' stdout], [], [ignore]) + +AT_CLEANUP + +AT_CHECK_AT_TEST([parallel truth], + [AT_CHECK([:], 0, [], [])], + [], [], [], [], + [], [-j]) + +AT_CHECK_AT_TEST([parallel fallacy], + [AT_CHECK([false], [], [], [])], + [], [1], [], [ignore], + [AT_CHECK([grep failed micro-suite.log], [], [ignore])], [-j]) + +AT_CHECK_AT_TEST([parallel skip], + [AT_CHECK([exit 77], 0, [], [])], + [], [], [], [], + [AT_CHECK([grep skipped micro-suite.log], [], [ignore])], [-j]) + +AT_CHECK_AT_TEST([parallel errexit], + [AT_CHECK([false]) + AT_CLEANUP + AT_SETUP([barrier test]) + AT_CHECK([sleep 4]) + AT_CLEANUP + AT_SETUP([test that should not be run]) + AT_CHECK([:])], + [], [1], [stdout], [stderr], + [AT_CHECK([test -f micro-suite.log], [1]) + touch micro-suite.log # shut up AT_CAPTURE_FILE. + AT_CHECK([grep "should not be run" stdout], [1]) + AT_CHECK([grep "[[12]] .* inhibited subsequent" stderr], [], [ignore])], + [-j2 --errexit]) + + ## ------------------- ## ## srcdir propagation. ## ## ------------------- ##