gh-109276: libregrtest: use separated file for JSON (#109277)

author Victor Stinner <vstinner@python.org>

Mon, 11 Sep 2023 17:33:42 +0000 (19:33 +0200)

committer GitHub <noreply@github.com>

Mon, 11 Sep 2023 17:33:42 +0000 (19:33 +0200)
author Victor Stinner <vstinner@python.org>
Mon, 11 Sep 2023 17:33:42 +0000 (19:33 +0200)
committer GitHub <noreply@github.com>
Mon, 11 Sep 2023 17:33:42 +0000 (19:33 +0200)
diff --git a/Lib/test/libregrtest/result.py b/Lib/test/libregrtest/result.py

index cfd08ecc41b7d453f312dcfa2e8a47d4065a6ee2..bf885264657d5c89602de669a520aa2922b2c148 100644 (file)
--- a/Lib/test/libregrtest/result.py
+++ b/Lib/test/libregrtest/result.py
@@ -156,7 +156,7 @@ class TestResult:
              return None
          return tuple(match_tests)
  
-    def write_json(self, file) -> None:
+    def write_json_into(self, file) -> None:
          json.dump(self, file, cls=_EncodeTestResult)
  
      @staticmethod
diff --git a/Lib/test/libregrtest/run_workers.py b/Lib/test/libregrtest/run_workers.py

index 768625cb507f9af3fe38e35ff0d412f983d4fd9b..5c665abfeb57bd523dbc860be2d824179d49011d 100644 (file)
--- a/Lib/test/libregrtest/run_workers.py
+++ b/Lib/test/libregrtest/run_workers.py
@@ -20,12 +20,14 @@ from .results import TestResults
  from .runtests import RunTests
  from .single import PROGRESS_MIN_TIME
  from .utils import (
-    StrPath, TestName,
+    StrPath, StrJSON, TestName, MS_WINDOWS,
      format_duration, print_warning)
  from .worker import create_worker_process, USE_PROCESS_GROUP
  
-if sys.platform == 'win32':
+if MS_WINDOWS:
      import locale
+    import msvcrt
+
  
  
  # Display the running tests if nothing happened last N seconds
@@ -153,10 +155,11 @@ class WorkerThread(threading.Thread):
      ) -> MultiprocessResult:
          return MultiprocessResult(test_result, stdout, err_msg)
  
-    def _run_process(self, runtests: RunTests, output_file: TextIO,
+    def _run_process(self, runtests: RunTests, output_fd: int, json_fd: int,
                       tmp_dir: StrPath | None = None) -> int:
          try:
-            popen = create_worker_process(runtests, output_file, tmp_dir)
+            popen = create_worker_process(runtests, output_fd, json_fd,
+                                          tmp_dir)
  
              self._killed = False
              self._popen = popen
@@ -208,7 +211,7 @@ class WorkerThread(threading.Thread):
      def _runtest(self, test_name: TestName) -> MultiprocessResult:
          self.current_test_name = test_name
  
-        if sys.platform == 'win32':
+        if MS_WINDOWS:
              # gh-95027: When stdout is not a TTY, Python uses the ANSI code
              # page for the sys.stdout encoding. If the main process runs in a
              # terminal, sys.stdout uses WindowsConsoleIO with UTF-8 encoding.
@@ -221,14 +224,25 @@ class WorkerThread(threading.Thread):
              match_tests = self.runtests.get_match_tests(test_name)
          else:
              match_tests = None
-        kwargs = {}
-        if match_tests:
-            kwargs['match_tests'] = match_tests
-        worker_runtests = self.runtests.copy(tests=tests, **kwargs)
+        err_msg = None
  
          # gh-94026: Write stdout+stderr to a tempfile as workaround for
          # non-blocking pipes on Emscripten with NodeJS.
-        with tempfile.TemporaryFile('w+', encoding=encoding) as stdout_file:
+        with (tempfile.TemporaryFile('w+', encoding=encoding) as stdout_file,
+              tempfile.TemporaryFile('w+', encoding='utf8') as json_file):
+            stdout_fd = stdout_file.fileno()
+            json_fd = json_file.fileno()
+            if MS_WINDOWS:
+                json_fd = msvcrt.get_osfhandle(json_fd)
+
+            kwargs = {}
+            if match_tests:
+                kwargs['match_tests'] = match_tests
+            worker_runtests = self.runtests.copy(
+                tests=tests,
+                json_fd=json_fd,
+                **kwargs)
+
              # gh-93353: Check for leaked temporary files in the parent process,
              # since the deletion of temporary files can happen late during
              # Python finalization: too late for libregrtest.
@@ -239,12 +253,14 @@ class WorkerThread(threading.Thread):
                  tmp_dir = tempfile.mkdtemp(prefix="test_python_")
                  tmp_dir = os.path.abspath(tmp_dir)
                  try:
-                    retcode = self._run_process(worker_runtests, stdout_file, tmp_dir)
+                    retcode = self._run_process(worker_runtests,
+                                                stdout_fd, json_fd, tmp_dir)
                  finally:
                      tmp_files = os.listdir(tmp_dir)
                      os_helper.rmtree(tmp_dir)
              else:
-                retcode = self._run_process(worker_runtests, stdout_file)
+                retcode = self._run_process(worker_runtests,
+                                            stdout_fd, json_fd)
                  tmp_files = ()
              stdout_file.seek(0)
  
@@ -257,23 +273,27 @@ class WorkerThread(threading.Thread):
                  result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
                  return self.mp_result_error(result, err_msg=err_msg)
  
+            try:
+                # deserialize run_tests_worker() output
+                json_file.seek(0)
+                worker_json: StrJSON = json_file.read()
+                if worker_json:
+                    result = TestResult.from_json(worker_json)
+                else:
+                    err_msg = f"empty JSON"
+            except Exception as exc:
+                # gh-101634: Catch UnicodeDecodeError if stdout cannot be
+                # decoded from encoding
+                err_msg = f"Fail to read or parser worker process JSON: {exc}"
+                result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
+                return self.mp_result_error(result, stdout, err_msg=err_msg)
+
          if retcode is None:
              result = TestResult(test_name, state=State.TIMEOUT)
              return self.mp_result_error(result, stdout)
  
-        err_msg = None
          if retcode != 0:
              err_msg = "Exit code %s" % retcode
-        else:
-            stdout, _, worker_json = stdout.rpartition("\n")
-            stdout = stdout.rstrip()
-            if not worker_json:
-                err_msg = "Failed to parse worker stdout"
-            else:
-                try:
-                    result = TestResult.from_json(worker_json)
-                except Exception as exc:
-                    err_msg = "Failed to parse worker JSON: %s" % exc
  
          if err_msg:
              result = TestResult(test_name, state=State.MULTIPROCESSING_ERROR)
diff --git a/Lib/test/libregrtest/runtests.py b/Lib/test/libregrtest/runtests.py

index e843cc2dadf7346c842336fcdcbaf3df405dc3d6..64f8f6ab0ff30578135719fb9a2e5dc4c6a1b44d 100644 (file)
--- a/Lib/test/libregrtest/runtests.py
+++ b/Lib/test/libregrtest/runtests.py
@@ -36,6 +36,9 @@ class RunTests:
      gc_threshold: int | None = None
      use_resources: list[str] = dataclasses.field(default_factory=list)
      python_cmd: list[str] | None = None
+    # On Unix, it's a file descriptor.
+    # On Windows, it's a handle.
+    json_fd: int | None = None
  
      def copy(self, **override):
          state = dataclasses.asdict(self)
diff --git a/Lib/test/libregrtest/single.py b/Lib/test/libregrtest/single.py

index 635b4f93702b04ccd9780825ebba86bed31c3899..c26e542cf15ef510a8ac29551c07d35e610675bd 100644 (file)
--- a/Lib/test/libregrtest/single.py
+++ b/Lib/test/libregrtest/single.py
@@ -271,5 +271,9 @@ def run_single_test(test_name: TestName, runtests: RunTests) -> TestResult:
              print(f"test {test_name} crashed -- {msg}",
                    file=sys.stderr, flush=True)
          result.state = State.UNCAUGHT_EXC
+
+    sys.stdout.flush()
+    sys.stderr.flush()
+
      result.duration = time.perf_counter() - start_time
      return result
diff --git a/Lib/test/libregrtest/worker.py b/Lib/test/libregrtest/worker.py

index ed1286e5700679c1c483b05808fc493cf1458c57..b3b204f65f92ec6052e1a1367a039845b54fdbee 100644 (file)
--- a/Lib/test/libregrtest/worker.py
+++ b/Lib/test/libregrtest/worker.py
@@ -10,7 +10,7 @@ from .setup import setup_process, setup_test_dir
  from .runtests import RunTests
  from .single import run_single_test
  from .utils import (
-    StrPath, StrJSON, FilterTuple,
+    StrPath, StrJSON, FilterTuple, MS_WINDOWS,
      get_work_dir, exit_timeout)
  
  
@@ -18,7 +18,7 @@ USE_PROCESS_GROUP = (hasattr(os, "setsid") and hasattr(os, "killpg"))
  
  
  def create_worker_process(runtests: RunTests,
-                          output_file: TextIO,
+                          output_fd: int, json_fd: int,
                            tmp_dir: StrPath | None = None) -> subprocess.Popen:
      python_cmd = runtests.python_cmd
      worker_json = runtests.as_json()
@@ -41,23 +41,42 @@ def create_worker_process(runtests: RunTests,
      # Running the child from the same working directory as regrtest's original
      # invocation ensures that TEMPDIR for the child is the same when
      # sysconfig.is_python_build() is true. See issue 15300.
-    kw = dict(
+    kwargs = dict(
          env=env,
-        stdout=output_file,
+        stdout=output_fd,
          # bpo-45410: Write stderr into stdout to keep messages order
-        stderr=output_file,
+        stderr=output_fd,
          text=True,
-        close_fds=(os.name != 'nt'),
+        close_fds=True,
      )
+    if not MS_WINDOWS:
+        kwargs['pass_fds'] = [json_fd]
+    else:
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.lpAttributeList = {"handle_list": [json_fd]}
+        kwargs['startupinfo'] = startupinfo
      if USE_PROCESS_GROUP:
-        kw['start_new_session'] = True
-    return subprocess.Popen(cmd, **kw)
+        kwargs['start_new_session'] = True
+
+    if MS_WINDOWS:
+        os.set_handle_inheritable(json_fd, True)
+    try:
+        return subprocess.Popen(cmd, **kwargs)
+    finally:
+        if MS_WINDOWS:
+            os.set_handle_inheritable(json_fd, False)
  
  
  def worker_process(worker_json: StrJSON) -> NoReturn:
      runtests = RunTests.from_json(worker_json)
      test_name = runtests.tests[0]
      match_tests: FilterTuple | None = runtests.match_tests
+    json_fd: int = runtests.json_fd
+
+    if MS_WINDOWS:
+        import msvcrt
+        json_fd = msvcrt.open_osfhandle(json_fd, os.O_WRONLY)
+
  
      setup_test_dir(runtests.test_dir)
      setup_process()
@@ -70,11 +89,10 @@ def worker_process(worker_json: StrJSON) -> NoReturn:
              print(f"Re-running {test_name} in verbose mode", flush=True)
  
      result = run_single_test(test_name, runtests)
-    print()   # Force a newline (just in case)
  
-    # Serialize TestResult as dict in JSON
-    result.write_json(sys.stdout)
-    sys.stdout.flush()
+    with open(json_fd, 'w', encoding='utf-8') as json_file:
+        result.write_json_into(json_file)
+
      sys.exit(0)
  
  
diff --git a/Misc/NEWS.d/next/Tests/2023-09-11-18-19-52.gh-issue-109276.btfFtT.rst b/Misc/NEWS.d/next/Tests/2023-09-11-18-19-52.gh-issue-109276.btfFtT.rst

new file mode 100644 (file)

index 0000000..5fcf662
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2023-09-11-18-19-52.gh-issue-109276.btfFtT.rst
@@ -0,0 +1,3 @@
+libregrtest now uses a separated file descriptor to write test result as JSON.
+Previously, if a test wrote debug messages late around the JSON, the main test
+process failed to parse JSON. Patch by Victor Stinner.
author	Victor Stinner <vstinner@python.org>
	Mon, 11 Sep 2023 17:33:42 +0000 (19:33 +0200)
committer	GitHub <noreply@github.com>
	Mon, 11 Sep 2023 17:33:42 +0000 (19:33 +0200)
Lib/test/libregrtest/result.py		patch \| blob \| blame \| history
Lib/test/libregrtest/run_workers.py		patch \| blob \| blame \| history
Lib/test/libregrtest/runtests.py		patch \| blob \| blame \| history
Lib/test/libregrtest/single.py		patch \| blob \| blame \| history
Lib/test/libregrtest/worker.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Tests/2023-09-11-18-19-52.gh-issue-109276.btfFtT.rst	[new file with mode: 0644]	patch \| blob