Occasionally LXC fails with a socket timeout error when running
on Github Actions. To alleviate this, run `lxc init` in a separate
thread. While init is running, periodically print an inane log
message in an attempt to appease Github Actions. If `lxc init`
fails, Container() will now retry the init up to five times.
Error: Failed instance creation: write unix
@->/var/snap/lxd/common/lxd/unix.socket: i/o timeout
Error: Failed instance creation: websocket: close 1006
(abnormal closure): unexpected EOF
Signed-off-by: Tom Hromatka <tom.hromatka@oracle.com>
Reviewed-by: Kamalesh Babulal <kamalesh.babulal@oracle.com>
# Author: Tom Hromatka <tom.hromatka@oracle.com>
#
-from run import Run
+from run import Run, RunError
+from queue import Queue
+import threading as tp
+from log import Log
import consts
+import time
import os
return Run.run(cmd2)
- def create(self):
+ def _init_container(self, q):
cmd = list()
if self.privileged:
cmd.append(self.name)
- return Run.run(cmd)
+ try:
+ Run.run(cmd)
+ q.put(True)
+ except Exception: # noqa: E722
+ q.put(False)
+ except BaseException: # noqa: E722
+ q.put(False)
+
+ def create(self):
+ # Github Actions sometimes has timeout issues with the LXC sockets.
+ # Try this command multiple times in an attempt to work around this
+ # limitation
+
+ queue = Queue()
+ sleep_time = 5
+ ret = False
+
+ for i in range(5):
+ thread = tp.Thread(target=self._init_container, args=(queue, ))
+ thread.start()
+
+ time_cnt = 0
+ while thread.is_alive():
+ time.sleep(sleep_time)
+ time_cnt += sleep_time
+ Log.log_debug('Waiting... {}'.format(time_cnt))
+
+ ret = queue.get()
+ if ret:
+ break
+ else:
+ try:
+ self.delete()
+ except RunError:
+ pass
+
+ thread.join()
+
+ if not ret:
+ raise ContainerError('Failed to create the container')
def delete(self):
cmd = list()