If a VM is shared between multiple github runners, there are chances of
each other stomping over other's run, if executed parallelly. To avoid
the race between the runners, introduce lock file, that gets acquired
(created) when ftest.sh starts and get removed by ftest-nocontainer.sh,
this ensures that both test cases are executed before other runner,
that's waiting for its chance to run. A runner would wait for
10 minutes before re-trying to run. At the max, a runner would wait for
50 minutes (5 retries) before giving up.
Signed-off-by: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Signed-off-by: Tom Hromatka <tom.hromatka@oracle.com>
(cherry picked from commit
ec78fee3b4970f24024be21fd20e3031dcbc71ab)
- name: Display test logs
if: ${{ failure() }}
run: |
- cat libcgroup-*/_build/sub/tests/ftests/ftests.sh.log
- cat libcgroup-*/_build/sub/tests/ftests/ftests-nocontainer.sh.log
+ cat libcgroup-*/_build/sub/tests/ftests/ftests-wrapper.sh.log
functionaltestsv1:
name: Cgroup v1 Functional Tests
- name: Display test logs
if: ${{ always() }}
run: |
- cat tests/ftests/ftests.sh.log
- cat tests/ftests/ftests-nocontainer.sh.log
+ cat tests/ftests/ftests-wrapper.sh.log
- name: Archive test logs
if: ${{ always() }}
uses: actions/upload-artifact@v3
- name: Display test logs
if: ${{ always() }}
run: |
- cat tests/ftests/ftests.sh.log
- cat tests/ftests/ftests-nocontainer.sh.log
+ cat tests/ftests/ftests-wrapper.sh.log
- name: Archive test logs
if: ${{ always() }}
uses: actions/upload-artifact@v3
- name: Display test logs
if: ${{ always() }}
run: |
- cat tests/ftests/ftests.sh.log
- cat tests/ftests/ftests-nocontainer.sh.log
+ cat tests/ftests/ftests-wrapper.sh.log
- name: Archive test logs
if: ${{ always() }}
uses: actions/upload-artifact@v3
- name: Display test logs
if: ${{ always() }}
run: |
- cat tests/ftests/ftests.sh.log
- cat tests/ftests/ftests-nocontainer.sh.log
+ cat tests/ftests/ftests-wrapper.sh.log
- name: Archive test logs
if: ${{ always() }}
uses: actions/upload-artifact@v3
# Author: Tom Hromatka <tom.hromatka@oracle.com>
#
-TESTS = ftests.sh ftests-nocontainer.sh
+TESTS = ftests-wrapper.sh
EXTRA_DIST_PYTHON_UTILS = \
cgroup.py \
# Intentionally omit the stress test from the extra dist
# 999-stress-cgroup_init.py
-EXTRA_DIST = README.md ftests.sh ftests-nocontainer.sh \
+EXTRA_DIST = README.md ftests-wrapper.sh \
${EXTRA_DIST_PYTHON_UTILS} ${EXTRA_DIST_PYTHON_TESTS}
clean-local: clean-local-check
+++ /dev/null
-#!/bin/bash
-# SPDX-License-Identifier: LGPL-2.1-only
-
-AUTOMAKE_SKIPPED=77
-AUTOMAKE_HARD_ERROR=99
-
-START_DIR=$PWD
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-
-if [ "$START_DIR" != "$SCRIPT_DIR" ]; then
- cp "$SCRIPT_DIR"/*.py "$START_DIR"
-fi
-
-if [ -d ../../src/python/build/lib.* ]; then
- pushd ../../src/python/build/lib.*
- export PYTHONPATH="$PYTHONPATH:$(pwd)"
- popd
-fi
-
-./ftests.py -l 10 -L "$START_DIR/ftests-nocontainer.py.log" --no-container \
- -n Libcg"$RANDOM"
-RET1=$?
-
-pushd ../../src || exit $AUTOMAKE_HARD_ERROR
-PATH="$PATH:$(pwd)"
-export PATH
-popd || exit $AUTOMAKE_HARD_ERROR
-
-sudo PATH=$PATH PYTHONPATH=$PYTHONPATH ./ftests.py -l 10 -s "sudo" \
- -L "$START_DIR/ftests-nocontainer.py.sudo.log" --no-container -n Libcg"$RANDOM"
-RET2=$?
-
-if [ "$START_DIR" != "$SCRIPT_DIR" ]; then
- rm -f "$START_DIR"/*.py
- rm -fr "$START_DIR"/__pycache__
- rm -f ftests-nocontainer.py.log
- rm -f ftests-nocontainer.py.sudo.log
-fi
-
-
-if [[ $RET1 -ne $AUTOMAKE_SKIPPED ]] && [[ $RET1 -ne 0 ]]; then
- # always return errors from the first test run
- exit $RET1
-fi
-if [[ $RET2 -ne $AUTOMAKE_SKIPPED ]] && [[ $RET2 -ne 0 ]]; then
- # return errors from the second test run
- exit $RET2
-fi
-
-if [[ $RET1 -eq 0 ]] || [[ $RET2 -eq 0 ]]; then
- exit 0
-fi
-
-if [[ $RET1 -eq $AUTOMAKE_SKIPPED ]] || [[ $RET2 -eq $AUTOMAKE_SKIPPED ]]; then
- exit $AUTOMAKE_SKIPPED
-fi
-
-# I don't think we should ever get here, but better safe than sorry
-exit $AUTOMAKE_HARD_ERROR
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: LGPL-2.1-only
+
+# the lock file is removed after all the tests complete
+function cleanup()
+{
+ sudo rm -f "$RUNNER_LOCK_FILE"
+ exit "$1"
+}
+
+AUTOMAKE_SKIPPED=77
+AUTOMAKE_HARD_ERROR=99
+
+# synchronize between different github runners running on
+# same VM's, this will stop runners from stomping over
+# each other's run.
+LIBCGROUP_RUN_DIR="/var/run/libcgroup/"
+RUNNER_LOCK_FILE="/var/run/libcgroup/github-runner.lock"
+RUNNER_SLEEP_SECS=300 # sleep for 5 minutes
+RUNNER_MAX_TRIES=10 # Abort after 50 minutes, if we don't chance to run
+
+START_DIR=$PWD
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+if [ "$START_DIR" != "$SCRIPT_DIR" ]; then
+ cp "$SCRIPT_DIR"/*.py "$START_DIR"
+fi
+
+PYTHON_LIBRARY_PATH=(../../src/python/build/lib*)
+if [ -d "${PYTHON_LIBRARY_PATH[0]}" ]; then
+ pushd "${PYTHON_LIBRARY_PATH[0]}" || cleanup $AUTOMAKE_HARD_ERROR
+ PYTHONPATH="$PYTHONPATH:$(pwd)"
+ export PYTHONPATH
+ popd || cleanup $AUTOMAKE_HARD_ERROR
+fi
+
+# If other runners are running then the file exists
+# let's wait for 5 minutes
+time_waited=0
+pretty_time=0
+while [ -f "$RUNNER_LOCK_FILE" ]; do
+ if [ "$RUNNER_MAX_TRIES" -le 0 ]; then
+ echo "Unable to get lock to run the ftests, aborting"
+ exit 1
+ fi
+
+ RUNNER_MAX_TRIES=$(( RUNNER_MAX_TRIES - 1 ))
+ sleep "$RUNNER_SLEEP_SECS"
+
+ time_waited=$(( time_waited + RUNNER_SLEEP_SECS ))
+ pretty_time=$(echo $time_waited | awk '{printf "%d:%02d:%02d", $1/3600, ($1/60)%60, $1%60}')
+ echo "[$pretty_time] Waiting on other runners to complete, $RUNNER_MAX_TRIES retries left"
+done
+
+# take the lock and start executing
+sudo mkdir -p "$LIBCGROUP_RUN_DIR"
+sudo touch "$RUNNER_LOCK_FILE"
+
+./ftests.py -l 10 -L "$START_DIR/ftests.py.log" -n Libcg"$RANDOM"
+RET1=$?
+
+./ftests.py -l 10 -L "$START_DIR/ftests-nocontainer.py.log" --no-container \
+ -n Libcg"$RANDOM"
+RET2=$?
+
+pushd ../../src || cleanup $AUTOMAKE_HARD_ERROR
+PATH="$PATH:$(pwd)"
+export PATH
+popd || cleanup $AUTOMAKE_HARD_ERROR
+
+sudo PATH="$PATH" PYTHONPATH="$PYTHONPATH" ./ftests.py -l 10 -s "sudo" \
+ -L "$START_DIR/ftests-nocontainer.py.sudo.log" --no-container -n Libcg"$RANDOM"
+RET3=$?
+
+if [ "$START_DIR" != "$SCRIPT_DIR" ]; then
+ rm -f "$START_DIR"/*.py
+ rm -fr "$START_DIR"/__pycache__
+ rm -f ftests.py.log
+ rm -f ftests-nocontainer.py.log
+ rm -f ftests-nocontainer.py.sudo.log
+fi
+
+if [[ $RET1 -ne $AUTOMAKE_SKIPPED ]] && [[ $RET1 -ne 0 ]]; then
+ # always return errors from the first test run
+ cleanup $RET1
+fi
+if [[ $RET2 -ne $AUTOMAKE_SKIPPED ]] && [[ $RET2 -ne 0 ]]; then
+ # return errors from the second test run
+ cleanup $RET2
+fi
+if [[ $RET3 -ne $AUTOMAKE_SKIPPED ]] && [[ $RET3 -ne 0 ]]; then
+ # return errors from the third test run
+ cleanup $RET3
+fi
+
+if [[ $RET1 -eq 0 ]] || [[ $RET2 -eq 0 ]] || [[ $RET3 -eq 0 ]]; then
+ cleanup 0
+fi
+
+if [[ $RET1 -eq $AUTOMAKE_SKIPPED ]] || [[ $RET2 -eq $AUTOMAKE_SKIPPED ]] ||
+ [[ $RET3 -eq $AUTOMAKE_SKIPPED ]]; then
+ cleanup $AUTOMAKE_SKIPPED
+fi
+
+# I don't think we should ever get here, but better safe than sorry
+cleanup $AUTOMAKE_HARD_ERROR
+++ /dev/null
-#!/bin/bash
-# SPDX-License-Identifier: LGPL-2.1-only
-
-START_DIR=$PWD
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-
-if [ "$START_DIR" != "$SCRIPT_DIR" ]; then
- cp "$SCRIPT_DIR"/*.py "$START_DIR"
-fi
-
-if [ -d ../../src/python/build/lib.* ]; then
- pushd ../../src/python/build/lib.*
- export PYTHONPATH="$PYTHONPATH:$(pwd)"
- popd
-fi
-
-./ftests.py -l 10 -L "$START_DIR/ftests.py.log" -n Libcg"$RANDOM"
-RET=$?
-
-if [ "$START_DIR" != "$SCRIPT_DIR" ]; then
- rm -f "$START_DIR"/*.py
- rm -fr "$START_DIR"/__pycache__
- rm -f ftests.py.log
-fi
-
-exit $RET