]> git.ipfire.org Git - thirdparty/openembedded/openembedded-core-contrib.git/commitdiff
oeqa/runtime/ssh: add retry logic and sleeps to allow for slower systems
authorJon Mason <jdmason@kudzu.us>
Mon, 24 Jun 2024 22:20:24 +0000 (18:20 -0400)
committerRichard Purdie <richard.purdie@linuxfoundation.org>
Tue, 2 Jul 2024 21:29:10 +0000 (22:29 +0100)
On exceptionally slow systems, the ssh test can intermittently fail due
to a race between when ping works and the networking applications being
brought up.  To work around this issue, add some retry logic when ssh
fails to connect.  According to the man page of ssh, "ssh exits
with the exit status of the remote command or with 255 if an error
occurred."  So, only retry if the return code is 255, and limit the
number of retries to prevent it looping forever.

Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
meta/lib/oeqa/runtime/cases/ssh.py

index cdbef595008cbb79f35c199af258968272ce2bd7..ae92bb34cd9c262ff51be3c3d82a8b2b8fdece9f 100644 (file)
@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: MIT
 #
 
+import time
+
 from oeqa.runtime.case import OERuntimeTestCase
 from oeqa.core.decorator.depends import OETestDepends
 from oeqa.runtime.decorator.package import OEHasPackage
@@ -13,12 +15,20 @@ class SSHTest(OERuntimeTestCase):
     @OETestDepends(['ping.PingTest.test_ping'])
     @OEHasPackage(['dropbear', 'openssh-sshd'])
     def test_ssh(self):
-        (status, output) = self.target.run('sleep 20', timeout=2)
-        msg='run() timed out but return code was zero.'
-        self.assertNotEqual(status, 0, msg=msg)
-        (status, output) = self.target.run('uname -a')
-        self.assertEqual(status, 0, msg='SSH Test failed: %s' % output)
-        (status, output) = self.target.run('cat /etc/controllerimage')
-        msg = "This isn't the right image  - /etc/controllerimage " \
-              "shouldn't be here %s" % output
-        self.assertEqual(status, 1, msg=msg)
+        for i in range(5):
+          status, output = self.target.run("uname -a", timeout=5)
+          if status == 0:
+              break
+          elif status == 255:
+              # ssh returns 255 only if a ssh error occurs.  This could
+              # be an issue with "Connection refused" because the port
+              # isn't open yet, and this could check explicitly for that
+              # here.  However, let's keep it simple and just retry for
+              # all errors a limited amount of times with a sleep to
+              # give it time for the port to open.
+              time.sleep(5)
+              continue
+          else:
+              self.fail("uname failed with \"%s\"" %output)
+        if status == 255:
+            self.fail("ssh error %s" %output)