Implement parallel Autotest test execution: testsuite --jobs.

author Ralf Wildenhues <Ralf.Wildenhues@gmx.de>

Thu, 2 Oct 2008 05:48:51 +0000 (07:48 +0200)

committer Ralf Wildenhues <Ralf.Wildenhues@gmx.de>

Thu, 2 Oct 2008 05:52:25 +0000 (07:52 +0200)
author Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
Thu, 2 Oct 2008 05:48:51 +0000 (07:48 +0200)
committer Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
Thu, 2 Oct 2008 05:52:25 +0000 (07:52 +0200)
diff --git a/NEWS b/NEWS

index 0b027a12fc7654609d2761ebf8a08bea8f688155..c34bedb4dcd1abf4f207765b47d9f2ba505f2c72 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,9 @@ GNU Autoconf NEWS - User visible changes.
  
  ** AC_LANG_ERLANG works once again (regression introduced in 2.61a).
  
+** Autotest testsuites accept an option --jobs[=N] for parallel testing.
+
+\f
  * Major changes in Autoconf 2.63 (2008-09-09) [stable]
    Released by Eric Blake, based on git versions 2.62.*.
  
diff --git a/doc/autoconf.texi b/doc/autoconf.texi

index fa9b6efc4d4fe5fbd691438754b59a09ed377d0a..e515a8705908041064aa4616d36b66c147668ffb 100644 (file)
--- a/doc/autoconf.texi
+++ b/doc/autoconf.texi
@@ -20981,6 +20981,23 @@ Change the current directory to @var{dir} before creating any files.
  Useful for running the testsuite in a subdirectory from a top-level
  Makefile.
  
+@item --jobs@r{[}=@var{n}@r{]}
+@itemx -j@ovar{n}
+Run @var{n} tests in parallel, if possible.  If @var{n} is not given,
+run all given tests in parallel.  Note that there should be no space
+before the argument to @option{-j}, as @option{-j @var{number}} denotes
+the separate arguments @option{-j} and @option{@var{number}}, see below.
+
+In parallel mode, the standard input device of the testsuite script is
+not available to commands inside a test group.  Furthermore, banner
+lines are not printed, and the summary line for each test group is
+output after the test group completes.  Summary lines may appear
+unordered.  If verbose and trace output are enabled (see below), they
+may appear intermixed from concurrently running tests.
+
+Parallel mode requires the @command{mkfifo} command to work, and will be
+silently disabled otherwise.
+
  @item --clean
  @itemx -c
  Remove all the files the test suite might have created and exit.  Meant
@@ -21058,6 +21075,8 @@ If any test fails, immediately abort testing.  It implies
  @option{--debug}: post test group clean up, and top-level logging
  are inhibited.  This option is meant for the full test
  suite, it is not really useful for generated debugging scripts.
+If the testsuite is run in parallel mode using @option{--jobs},
+then concurrently running tests will finish before exiting.
  
  @item --verbose
  @itemx -v
diff --git a/lib/autotest/general.m4 b/lib/autotest/general.m4

index 90fce9fb72de2c9bdeb6c9121d4166c961a319d9..4683df492c1fe4f225f08921eefa021c4e60d0a7 100644 (file)
--- a/lib/autotest/general.m4
+++ b/lib/autotest/general.m4
@@ -217,6 +217,7 @@ m4_foreach([AT_name], [_AT_DEFINE_INIT_LIST], [m4_popdef(m4_defn([AT_name]))])
  m4_wrap([_AT_FINISH])
  dnl Define FDs.
  m4_define([AS_MESSAGE_LOG_FD], [5])
+m4_define([AT_JOB_FIFO_FD], [6])
  AS_INIT[]dnl
  m4_divert_push([DEFAULTS])dnl
  AT_COPYRIGHT(
@@ -399,6 +400,8 @@ at_errexit_p=false
  # Shall we be verbose?  ':' means no, empty means yes.
  at_verbose=:
  at_quiet=
+# Running several jobs in parallel, 0 means as many as test groups.
+at_jobs=1
  
  # Shall we keep the debug scripts?  Must be `:' when the suite is
  # run by a debug script, so that the script doesn't remove itself.
@@ -569,6 +572,22 @@ do
         at_dir=$at_optarg
         ;;
  
+    # Parallel execution.
+    --jobs | -j )
+       at_jobs=0
+       ;;
+    --jobs=* | -j[[0-9]]* )
+       if test -n "$at_optarg"; then
+         at_jobs=$at_optarg
+       else
+         at_jobs=`expr X$at_option : 'X-j\(.*\)'`
+       fi
+       case $at_jobs in *[[!0-9]]*)
+         at_optname=`echo " $at_option" | sed 's/^ //; s/[[0-9=]].*//'`
+         AS_ERROR([non-numeric argument to $at_optname: $at_jobs]) ;;
+       esac
+       ;;
+
      # Keywords.
      --keywords | -k )
         at_prev=--keywords
@@ -673,6 +692,8 @@ dnl extra quoting prevents emacs whitespace mode from putting tabs in output
  Execution tuning:
    -C, --directory=DIR
  [                 change to directory DIR before starting]
+  -j, --jobs[[=N]]
+[                 Allow N jobs at once; infinite jobs with no arg (default 1)]
    -k, --keywords=KEYWORDS
  [                 select the tests matching all the comma-separated KEYWORDS]
  [                 multiple \`-k' accumulate; prefixed \`!' negates a KEYWORD]
@@ -813,6 +834,8 @@ at_suite_log=$at_dir/$as_me.log
  at_helper_dir=$at_suite_dir/at-groups
  # Stop file: if it exists, do not start new jobs.
  at_stop_file=$at_suite_dir/at-stop
+# The fifo used for the job dispatcher.
+at_job_fifo=$at_suite_dir/at-job-fifo
  
  if $at_clean; then
    test -d "$at_suite_dir" &&
@@ -993,6 +1016,18 @@ BEGIN { FS="\a" }
    AS_ERROR([cannot create test line number cache])
  rm -f "$at_suite_dir/at-source-lines"
  
+# Set number of jobs for `-j'; avoid more jobs than test groups.
+set X $at_groups; shift; at_max_jobs=$[@%:@]
+if test $at_jobs -eq 0 || test $at_jobs -gt $at_max_jobs; then
+  at_jobs=$at_max_jobs
+fi
+
+# If parallel mode, don't output banners, don't split summary lines.
+if test $at_jobs -ne 1; then
+  at_print_banners=false
+  at_quiet=:
+fi
+
  # Set up helper dirs.
  rm -rf "$at_helper_dir" &&
  mkdir "$at_helper_dir" &&
@@ -1101,8 +1136,13 @@ _ATEOF
         ;;
    esac
    echo "$at_res" > "$at_job_dir/$at_res"
-  # Make sure there is a separator even with long titles.
-  AS_ECHO([" $at_msg"])
+  # In parallel mode, output the summary line only afterwards.
+  if test $at_jobs -ne 1 && test -n "$at_verbose"; then
+    AS_ECHO(["$at_desc_line $at_msg"])
+  else
+    # Make sure there is a separator even with long titles.
+    AS_ECHO([" $at_msg"])
+  fi
    at_log_msg="$at_group. $at_desc ($at_setup_line): $at_msg"
    case $at_status in
      0|77)
@@ -1148,20 +1188,74 @@ _ATEOF
  m4_text_box([Driver loop.])
  
  rm -f "$at_stop_file"
+trap 'exit_status=$?
+  echo "signal received, bailing out" >&2
+  echo stop > "$at_stop_file"
+  exit $exit_status' 1 2 13 15
  at_first=:
  
-for at_group in $at_groups; do
-  at_func_group_prepare
-  if cd "$at_group_dir" &&
-     at_func_test $at_group &&
-     . "$at_test_source"; then :; else
-    AS_WARN([unable to parse test group: $at_group])
-    at_failed=:
+if test $at_jobs -ne 1 &&
+     rm -f "$at_job_fifo" &&
+     ( mkfifo "$at_job_fifo" ) 2>/dev/null &&
+     exec AT_JOB_FIFO_FD<> "$at_job_fifo"
+then
+  # FIFO job dispatcher.
+  echo
+  # Turn jobs into a list of numbers, starting from 1.
+  at_joblist=`AS_ECHO([" $at_groups_all "]) | \
+    sed 's/\( '$at_jobs'\) .*/\1/'`
+
+  set X $at_joblist
+  shift
+  for at_group in $at_groups; do
+    (
+      # Start one test group.
+      at_func_group_prepare
+      if cd "$at_group_dir" &&
+        at_func_test $at_group &&
+        . "$at_test_source" # AT_JOB_FIFO_FD<&-
+      then :; else
+       AS_WARN([unable to parse test group: $at_group])
+       at_failed=:
+      fi
+      at_func_group_postprocess
+      echo token >&AT_JOB_FIFO_FD
+    ) &
+    shift # Consume one token.
+    if test $[@%:@] -gt 0; then :; else
+      read at_token <&AT_JOB_FIFO_FD || break
+      set x $[*]
+    fi
+    test -f "$at_stop_file" && break
+    at_first=false
+  done
+  # Read back the remaining ($at_jobs - 1) tokens.
+  set X $at_joblist
+  shift
+  if test $[@%:@] -gt 0; then
+    shift
+    for at_job
+    do
+      read at_token
+    done <&AT_JOB_FIFO_FD
    fi
-  at_func_group_postprocess
-  test -f "$at_stop_file" && break
-  at_first=false
-done
+  exec AT_JOB_FIFO_FD<&-
+  wait
+else
+  # Run serially, avoid forks and other potential surprises.
+  for at_group in $at_groups; do
+    at_func_group_prepare
+    if cd "$at_group_dir" &&
+       at_func_test $at_group &&
+       . "$at_test_source"; then :; else
+      AS_WARN([unable to parse test group: $at_group])
+      at_failed=:
+    fi
+    at_func_group_postprocess
+    test -f "$at_stop_file" && break
+    at_first=false
+  done
+fi
  
  # Wrap up the test suite with summary statistics.
  cd "$at_helper_dir"
@@ -1530,8 +1624,9 @@ at_setup_line='m4_defn([AT_line])'
  m4_if(AT_banner_ordinal, [0], [], [at_func_banner AT_banner_ordinal
  ])dnl
  at_desc="AS_ESCAPE(m4_dquote(m4_defn([AT_description])))"
-$at_quiet AS_ECHO_N([m4_format(["%3d: $at_desc%*s"], AT_ordinal,
-  m4_max(0, m4_eval(47 - m4_qlen(m4_defn([AT_description])))), [])])
+at_desc_line=m4_format(["%3d: $at_desc%*s"], AT_ordinal,
+  m4_max(0, m4_eval(47 - m4_qlen(m4_defn([AT_description])))), [])
+$at_quiet AS_ECHO_N(["$at_desc_line"])
  m4_divert_push([TEST_SCRIPT])dnl
  ])
  
diff --git a/tests/autotest.at b/tests/autotest.at

index c4c0eda6d3245e0071dce496b5e462a6e40fb0c5..a20c4acf551df823e70e4d19ddc393c97bbff0ed 100644 (file)
--- a/tests/autotest.at
+++ b/tests/autotest.at
@@ -773,6 +773,127 @@ AT_CHECK_KEYS([--list -k none -k first], [none|first], [2], [second|both], [0])
  AT_CLEANUP
  
  
+## ----------------------- ##
+## parallel test execution ##
+## ----------------------- ##
+
+AT_SETUP([parallel test execution])
+
+# This test tries to ensure that -j runs tests in parallel.
+# Such a test is inherently racy, because there are no real-time
+# guarantees about scheduling delays.  So we try to minimize
+# the chance to lose the race.
+
+# The time needed for a micro-suite consisting of NTESTS tests each
+# sleeping for a second is estimated by
+#   startup + ntests * (serial_overhead + 1 / njobs)
+#
+# in absence of major scheduling delays.  This leads to side conditions:
+# - NTESTS should be high, so the STARTUP time is small compared to the
+#   test run time, and scheduling delays can even out; it should not be
+#   too high, to not slow down the testsuite unnecessarily,
+# - the number of concurrent jobs NJOBS should not be too low, so the
+#   race is not lost so easily; it should not be too high, to avoid fork
+#   failures on tightly limited systems.  4 seems a good compromise
+#   here, considering that Autotest spawns several other processes.
+# - STARTUP is assumed to be the same for parallel and serial runs, so
+#   the latter can estimate the former.
+# - To avoid unportable output from time measurement commands, spawn
+#   both a parallel and a serial testsuite run; check that the former
+#   completes before the latter has completed a fraction SERIAL_NTESTS
+#   of the tests (the serial run is executed in a subdirectory), plus
+#   some additional time to allow for compensation of SERIAL_OVERHEAD.
+# - when adding this time to the serial test execution, an initial delay
+#   SERIAL_DELAY of the serial test helps to avoid unreliable scheduling
+#   due to the startup burst of the suites.
+
+dnl total number of tests.
+m4_define([AT_PARALLEL_NTESTS], [16])
+dnl number of jobs to run in parallel.
+m4_define([AT_PARALLEL_NJOBS], [4])
+dnl number of tests to run serially, as comparison.
+m4_define([AT_PARALLEL_SERIAL_NTESTS],
+  m4_eval(AT_PARALLEL_NTESTS / AT_PARALLEL_NJOBS))
+dnl initial delay of serial run, to compensate for SERIAL_OVERHEAD.
+dnl This corresponds to 0.67 s of overhead per test.
+m4_define([AT_PARALLEL_SERIAL_DELAY],
+  m4_eval((AT_PARALLEL_NTESTS - AT_PARALLEL_SERIAL_NTESTS + 1) * 2 / 3))
+
+
+AT_CHECK_AT_PREP([micro-suite],
+[[AT_INIT([suite to test parallel execution])
+m4_for([count], [1], ]]AT_PARALLEL_NTESTS[[, [],
+   [AT_SETUP([test number count])
+    AT_CHECK([sleep 1])
+    AT_CLEANUP
+])
+]])
+
+AT_CHECK([$CONFIG_SHELL ./micro-suite --help | grep " --jobs"], [0], [ignore])
+AT_CHECK([$CONFIG_SHELL ./micro-suite -j2foo], [1], [], [stderr])
+AT_CHECK([grep 'non-numeric argument' stderr], [], [ignore])
+AT_CHECK([$CONFIG_SHELL ./micro-suite --jobs=foo], [1], [], [stderr])
+AT_CHECK([grep 'non-numeric argument' stderr], [], [ignore])
+AT_CHECK([$CONFIG_SHELL ./micro-suite -j[]AT_PARALLEL_NJOBS], [], [stdout])
+# Ensure that all tests run, and lines are not split.
+AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [AT_PARALLEL_NTESTS
+])
+# Running one test with -j should produce correctly formatted output:
+AT_CHECK([$CONFIG_SHELL ./micro-suite -j 3], [], [stdout])
+AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [1
+])
+# Specifying more jobs than tests should not hang:
+AT_CHECK([$CONFIG_SHELL ./micro-suite -j3 3], [], [stdout])
+AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [1
+])
+
+# The parallel scheduler requires mkfifo to work.
+AT_CHECK([mkfifo fifo || exit 77])
+mkdir serial
+
+# Unfortunately, the return value of wait is unreliable,
+# so we check that kill fails.
+AT_CHECK([$CONFIG_SHELL ./micro-suite --jobs=[]AT_PARALLEL_NJOBS & ]dnl
+         [sleep AT_PARALLEL_SERIAL_DELAY && ]dnl
+         [cd serial && $CONFIG_SHELL ../micro-suite -AT_PARALLEL_SERIAL_NTESTS >/dev/null && ]dnl
+         [{ kill $! && exit 1; :; }], [], [stdout], [ignore])
+AT_CHECK([grep -c '^.\{53\}ok' stdout], [], [AT_PARALLEL_NTESTS
+])
+AT_CHECK([grep 'AT_PARALLEL_NTESTS tests' stdout], [], [ignore])
+
+AT_CLEANUP
+
+AT_CHECK_AT_TEST([parallel truth],
+  [AT_CHECK([:], 0, [], [])],
+  [], [], [], [],
+  [], [-j])
+
+AT_CHECK_AT_TEST([parallel fallacy],
+  [AT_CHECK([false], [], [], [])],
+  [], [1], [], [ignore],
+  [AT_CHECK([grep failed micro-suite.log], [], [ignore])], [-j])
+
+AT_CHECK_AT_TEST([parallel skip],
+  [AT_CHECK([exit 77], 0, [], [])],
+  [], [], [], [],
+  [AT_CHECK([grep skipped micro-suite.log], [], [ignore])], [-j])
+
+AT_CHECK_AT_TEST([parallel errexit],
+  [AT_CHECK([false])
+   AT_CLEANUP
+   AT_SETUP([barrier test])
+   AT_CHECK([sleep 4])
+   AT_CLEANUP
+   AT_SETUP([test that should not be run])
+   AT_CHECK([:])],
+  [], [1], [stdout], [stderr],
+  [AT_CHECK([test -f micro-suite.log], [1])
+   touch micro-suite.log # shut up AT_CAPTURE_FILE.
+   AT_CHECK([grep "should not be run" stdout], [1])
+   AT_CHECK([grep "[[12]] .* inhibited subsequent" stderr], [], [ignore])],
+  [-j2 --errexit])
+
+
  ## ------------------- ##
  ## srcdir propagation. ##
  ## ------------------- ##
author	Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
	Thu, 2 Oct 2008 05:48:51 +0000 (07:48 +0200)
committer	Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
	Thu, 2 Oct 2008 05:52:25 +0000 (07:52 +0200)
NEWS		patch \| blob \| blame \| history
doc/autoconf.texi		patch \| blob \| blame \| history
lib/autotest/general.m4		patch \| blob \| blame \| history
tests/autotest.at		patch \| blob \| blame \| history