# Async call runner_clearlocks
# Called by controller
sub runnerac_clearlocks {
- controlleripccall(\&runner_clearlocks, @_);
+ return controlleripccall(\&runner_clearlocks, @_);
}
# Async call runner_shutdown
# Called by controller
sub runnerac_shutdown {
my ($runnerid)=$_[0];
- controlleripccall(\&runner_shutdown, @_);
+ my $err = controlleripccall(\&runner_shutdown, @_);
# These have no more use
close($controllerw{$runnerid});
undef $controllerw{$runnerid};
close($controllerr{$runnerid});
undef $controllerr{$runnerid};
+ return $err;
}
# Async call of runner_stopservers
# Called by controller
sub runnerac_stopservers {
- controlleripccall(\&runner_stopservers, @_);
+ return controlleripccall(\&runner_stopservers, @_);
}
# Async call of runner_test_preprocess
# Called by controller
sub runnerac_test_preprocess {
- controlleripccall(\&runner_test_preprocess, @_);
+ return controlleripccall(\&runner_test_preprocess, @_);
}
# Async call of runner_test_run
# Called by controller
sub runnerac_test_run {
- controlleripccall(\&runner_test_run, @_);
+ return controlleripccall(\&runner_test_run, @_);
}
###################################################################
# Call an arbitrary function via IPC
# The first argument is the function reference, the second is the runner ID
+# Returns 0 on success, -1 on error writing to runner
# Called by controller (indirectly, via a more specific function)
sub controlleripccall {
my $funcref = shift @_;
my $margs = freeze \@_;
# Send IPC call via pipe
- syswrite($controllerw{$runnerid}, (pack "L", length($margs)) . $margs);
+ my $err;
+ while(! defined ($err = syswrite($controllerw{$runnerid}, (pack "L", length($margs)) . $margs)) || $err <= 0) {
+ if((!defined $err && ! $!{EINTR}) || (defined $err && $err == 0)) {
+ # Runner has likely died
+ return -1;
+ }
+ # system call was interrupted, probably by ^C; restart it so we stay in sync
+ }
if(!$multiprocess) {
# Call the remote function here in single process mode
ipcrecv();
}
+ return 0;
}
###################################################################
# Receive async response of a previous call via IPC
-# The first return value is the runner ID
+# The first return value is the runner ID or undef on error
# Called by controller
sub runnerar {
my ($runnerid) = @_;
my $err;
my $datalen;
while(! defined ($err = sysread($controllerr{$runnerid}, $datalen, 4)) || $err <= 0) {
- $!{EINTR} || die "error in runnerar: $!\n";
+ if((!defined $err && ! $!{EINTR}) || (defined $err && $err == 0)) {
+ # Runner is likely dead and closed the pipe
+ return undef;
+ }
# system call was interrupted, probably by ^C; restart it so we stay in sync
}
my $len=unpack("L", $datalen);
my $buf;
while(! defined ($err = sysread($controllerr{$runnerid}, $buf, $len)) || $err <= 0) {
- $!{EINTR} || die "error in runnerar: $!\n";
+ if((!defined $err && ! $!{EINTR}) || (defined $err && $err == 0)) {
+ # Runner is likely dead and closed the pipe
+ return undef;
+ }
# system call was interrupted, probably by ^C; restart it so we stay in sync
}
}
###################################################################
-# Returns runner ID if a response from an async call is ready
+# Returns runner ID if a response from an async call is ready or error
+# First value is ready, second is error, however an error case shows up
+# as ready in Linux, so you can't trust it.
# argument is 0 for nonblocking, undef for blocking, anything else for timeout
# Called by controller
sub runnerar_ready {
$maxfileno || die "Internal error: no runners are available to wait on\n";
# Wait for any pipe from any runner to be ready
+ # This may be interrupted and return EINTR, but this is ignored and the
+ # caller will need to later call this function again.
# TODO: this is relatively slow with hundreds of fds
- # TODO: handle errors
- if(select(my $rout=$rin, undef, undef, $blocking)) {
+ my $ein = $rin;
+ if(select(my $rout=$rin, undef, my $eout=$ein, $blocking) >= 1) {
for my $fd (0..$maxfileno) {
+ # Return an error condition first in case it's both
+ if(vec($eout, $fd, 1)) {
+ return (undef, $idbyfileno{$fd});
+ }
if(vec($rout, $fd, 1)) {
- return $idbyfileno{$fd};
+ return ($idbyfileno{$fd}, undef);
}
}
die "Internal pipe readiness inconsistency\n";
}
- return undef;
+ return (undef, undef);
}
+
###################################################################
# Cleanly abort and exit the runner
# This uses print since there is no longer any controller to write logs.
}
$SIG{USR1} = \&catch_usr1;
+$SIG{PIPE} = 'IGNORE'; # these errors are captured in the read/write calls
##########################################################################
# Clear all possible '*_proxy' environment variables for various protocols
if(!$filename) {
logmsg "ERROR: section verify=>file$partsuffix ".
"has no name attribute\n";
- runnerac_stopservers($runnerid);
- # TODO: this is a blocking call that will stall the controller,
- # but this error condition should never happen except during
- # development.
- my ($rid, $unexpected, $logs) = runnerar($runnerid);
- logmsg $logs;
+ if (runnerac_stopservers($runnerid)) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ } else {
+
+ # TODO: this is a blocking call that will stall the controller,
+ # but this error condition should never happen except during
+ # development.
+ my ($rid, $unexpected, $logs) = runnerar($runnerid);
+ if(!$rid) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ } else {
+ logmsg $logs;
+ }
+ }
# timestamp test result verification end
$timevrfyend{$testnum} = Time::HiRes::time();
return -1;
&& $clearlocks) {
# On Windows, lock files can't be deleted when the process still
# has them open, so kill those processes first
- runnerac_clearlocks($runnerid, "$logdir/$LOCKDIR");
+ if(runnerac_clearlocks($runnerid, "$logdir/$LOCKDIR")) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ $singletest_state{$runnerid} = ST_INIT;
+ return (-1, 0);
+ }
$singletest_state{$runnerid} = ST_CLEARLOCKS;
} else {
$singletest_state{$runnerid} = ST_INITED;
} elsif($singletest_state{$runnerid} == ST_CLEARLOCKS) {
my ($rid, $logs) = runnerar($runnerid);
+ if(!$rid) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ $singletest_state{$runnerid} = ST_INIT;
+ return (-1, 0);
+ }
logmsg $logs;
my $logdir = getrunnerlogdir($runnerid);
cleardir($logdir);
# Register the test case with the CI environment
citest_starttest($testnum);
- runnerac_test_preprocess($runnerid, $testnum);
+ if(runnerac_test_preprocess($runnerid, $testnum)) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ $singletest_state{$runnerid} = ST_INIT;
+ return (-1, 0);
+ }
$singletest_state{$runnerid} = ST_PREPROCESS;
} elsif($singletest_state{$runnerid} == ST_PREPROCESS) {
my ($rid, $why, $error, $logs, $testtimings) = runnerar($runnerid);
+ if(!$rid) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ $singletest_state{$runnerid} = ST_INIT;
+ return (-1, 0);
+ }
logmsg $logs;
updatetesttimings($testnum, %$testtimings);
if($error == -2) {
my $CURLOUT;
my $tool;
my $usedvalgrind;
- runnerac_test_run($runnerid, $testnum);
+ if(runnerac_test_run($runnerid, $testnum)) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ $singletest_state{$runnerid} = ST_INIT;
+ return (-1, 0);
+ }
$singletest_state{$runnerid} = ST_RUN;
} elsif($singletest_state{$runnerid} == ST_RUN) {
my ($rid, $error, $logs, $testtimings, $cmdres, $CURLOUT, $tool, $usedvalgrind) = runnerar($runnerid);
+ if(!$rid) {
+ logmsg "ERROR: runner $runnerid seems to have died\n";
+ $singletest_state{$runnerid} = ST_INIT;
+ return (-1, 0);
+ }
logmsg $logs;
updatetesttimings($testnum, %$testtimings);
if($error == -1) {
# this runner is busy running a test
$runnersrunning{$runnerid} = $testnum;
} else {
- # We make this assumption to avoid having to handle $error here
- die "Internal error: test must not complete on first call";
+ runnerready($runnerid);
+ if($error >= 0) {
+ # We make this simplifying assumption to avoid having to handle
+ # $error properly here, but we must handle the case of runner
+ # death without abending here.
+ die "Internal error: test must not complete on first call";
+ }
}
}
# one immediately. If all runners are busy, wait a fraction of a second
# for one to finish so we can still loop around to check the abort flag.
my $runnerwait = scalar(@runnersidle) && scalar(@runtests) ? 0 : 0.5;
- my $ridready = runnerar_ready($runnerwait);
+ my ($ridready, $riderror) = runnerar_ready($runnerwait);
+ if($ridready && ! defined $runnersrunning{$ridready}) {
+ # On Linux, a closed pipe still shows up as ready instead of error.
+ # Detect this here by seeing if we are expecting it to be ready and
+ # treat it as an error if not.
+ logmsg "ERROR: Runner $ridready is unexpectedly ready; is probably actually dead\n";
+ $riderror = $ridready;
+ undef $ridready;
+ }
if($ridready) {
# This runner is ready to be serviced
my $testnum = $runnersrunning{$ridready};
} else {
# Test is complete
runnerready($ridready);
-print "COMPLETED $testnum \n" if($verbose); #. join(",", keys(%runnersrunning)) . "\n";
if($error < 0) {
# not a test we can run
}
}
}
+ if($riderror) {
+ logmsg "ERROR: runner $riderror is dead! aborting test run\n";
+ delete $runnersrunning{$riderror} if(defined $runnersrunning{$riderror});
+ $globalabort = 1;
+ }
}
my $sofar = time() - $start;