]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Combine and extend the utility for calling the live services to be more robust agains...
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 29 Aug 2023 17:25:20 +0000 (10:25 -0700)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Sat, 9 Sep 2023 02:20:08 +0000 (19:20 -0700)
src/documents/tests/utils.py
src/paperless_mail/tests/test_parsers_live.py
src/paperless_tika/tests/test_live_tika.py

index 483d3b12d79bd4d496f874587bb3b78ff12d080c..c679a8f0234d35de780e2f40972671eed1720e4a 100644 (file)
@@ -1,14 +1,21 @@
 import shutil
 import tempfile
+import time
+import warnings
 from collections import namedtuple
 from contextlib import contextmanager
 from os import PathLike
 from pathlib import Path
+from typing import Any
+from typing import Callable
 from typing import Iterator
+from typing import List
 from typing import Tuple
 from typing import Union
 from unittest import mock
 
+import httpx
+import pytest
 from django.apps import apps
 from django.db import connection
 from django.db.migrations.executor import MigrationExecutor
@@ -78,6 +85,61 @@ def paperless_environment():
             remove_dirs(dirs)
 
 
+def util_call_with_backoff(
+    method_or_callable: Callable,
+    args: Union[List, Tuple],
+    *,
+    skip_on_503=True,
+) -> Tuple[bool, Any]:
+    """
+    For whatever reason, the images started during the test pipeline like to
+    segfault sometimes, crash and otherwise fail randomly, when run with the
+    exact files that usually pass.
+
+    So, this function will retry the given method/function up to 3 times, with larger backoff
+    periods between each attempt, in hopes the issue resolves itself during
+    one attempt to parse.
+
+    This will wait the following:
+        - Attempt 1 - 20s following failure
+        - Attempt 2 - 40s following failure
+        - Attempt 3 - 80s following failure
+
+    """
+    result = None
+    succeeded = False
+    retry_time = 20.0
+    retry_count = 0
+    status_codes = []
+    max_retry_count = 3
+
+    while retry_count < max_retry_count and not succeeded:
+        try:
+            result = method_or_callable(*args)
+
+            succeeded = True
+        except httpx.HTTPError as exc:
+            warnings.warn(f"HTTP Exception for {exc.request.url} - {exc}")
+
+            if isinstance(exc, httpx.HTTPStatusError):
+                status_codes.append(exc.response.status_code)
+
+            retry_count = retry_count + 1
+
+            time.sleep(retry_time)
+            retry_time = retry_time * 2.0
+
+    if (
+        not succeeded
+        and status_codes
+        and skip_on_503
+        and all(element == httpx.codes.SERVICE_UNAVAILABLE for element in status_codes)
+    ):
+        pytest.skip("Repeated HTTP 503 for service")
+
+    return succeeded, result
+
+
 class DirectoriesMixin:
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
index 208383b15ee1738c6f06afef57f5c61b8d7e484e..c58c1dfbcf60b077adcae342a979397a5537c5be 100644 (file)
@@ -1,5 +1,4 @@
 import os
-import time
 from unittest import mock
 
 import httpx
@@ -10,6 +9,7 @@ from pdfminer.high_level import extract_text
 from PIL import Image
 
 from documents.tests.utils import FileSystemAssertsMixin
+from documents.tests.utils import util_call_with_backoff
 from paperless_mail.tests.test_parsers import BaseMailParserTestCase
 
 
@@ -79,51 +79,6 @@ class TestParserLive(FileSystemAssertsMixin, BaseMailParserTestCase):
     def imagehash(file, hash_size=18):
         return f"{average_hash(Image.open(file), hash_size)}"
 
-    def util_call_with_backoff(self, method_or_callable, args):
-        """
-        For whatever reason, the image started during the test pipeline likes to
-        segfault sometimes, when run with the exact files that usually pass.
-
-        So, this function will retry the parsing up to 3 times, with larger backoff
-        periods between each attempt, in hopes the issue resolves itself during
-        one attempt to parse.
-
-        This will wait the following:
-            - Attempt 1 - 20s following failure
-            - Attempt 2 - 40s following failure
-            - Attempt 3 - 80s following failure
-
-        """
-        result = None
-        succeeded = False
-        retry_time = 20.0
-        retry_count = 0
-        max_retry_count = 3
-
-        while retry_count < max_retry_count and not succeeded:
-            try:
-                result = method_or_callable(*args)
-
-                succeeded = True
-            except httpx.HTTPError as e:
-                # Retry on HTTP errors
-                print(f"{e} during try #{retry_count}", flush=True)
-
-                retry_count = retry_count + 1
-
-                time.sleep(retry_time)
-                retry_time = retry_time * 2.0
-            except Exception:
-                # Not on other error
-                raise
-
-        self.assertTrue(
-            succeeded,
-            "Continued Tika server errors after multiple retries",
-        )
-
-        return result
-
     @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
     def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
         """
@@ -187,7 +142,7 @@ class TestParserLive(FileSystemAssertsMixin, BaseMailParserTestCase):
             self.SAMPLE_DIR / "html.eml",
         )
 
-        pdf_path = self.util_call_with_backoff(
+        _, pdf_path = util_call_with_backoff(
             self.parser.generate_pdf,
             [msg],
         )
@@ -210,7 +165,7 @@ class TestParserLive(FileSystemAssertsMixin, BaseMailParserTestCase):
             - gotenberg is called and the resulting file is returned and look as expected.
         """
 
-        self.util_call_with_backoff(
+        util_call_with_backoff(
             self.parser.parse,
             [self.SAMPLE_DIR / "html.eml", "message/rfc822"],
         )
index f4c8e01349285ca9ae407574aa1b73ec1325f0ab..f34278467b792608996e3d48746ba24fdb03db31 100644 (file)
@@ -1,11 +1,11 @@
 import os
-import time
 from pathlib import Path
 from typing import Final
 
 import pytest
 from django.test import TestCase
 
+from documents.tests.utils import util_call_with_backoff
 from paperless_tika.parsers import TikaDocumentParser
 
 
@@ -28,44 +28,6 @@ class TestTikaParserAgainstServer(TestCase):
     def tearDown(self) -> None:
         self.parser.cleanup()
 
-    def try_parse_with_wait(self, test_file: Path, mime_type: str):
-        """
-        For whatever reason, the image started during the test pipeline likes to
-        segfault sometimes, when run with the exact files that usually pass.
-
-        So, this function will retry the parsing up to 3 times, with larger backoff
-        periods between each attempt, in hopes the issue resolves itself during
-        one attempt to parse.
-
-        This will wait the following:
-            - Attempt 1 - 20s following failure
-            - Attempt 2 - 40s following failure
-            - Attempt 3 - 80s following failure
-
-        """
-        succeeded = False
-        retry_time = 20.0
-        retry_count = 0
-        max_retry_count = 3
-
-        while retry_count < max_retry_count and not succeeded:
-            try:
-                self.parser.parse(test_file, mime_type)
-
-                succeeded = True
-            except Exception as e:
-                print(f"{e} during try #{retry_count}", flush=True)
-
-                retry_count = retry_count + 1
-
-                time.sleep(retry_time)
-                retry_time = retry_time * 2.0
-
-        self.assertTrue(
-            succeeded,
-            "Continued Tika server errors after multiple retries",
-        )
-
     def test_basic_parse_odt(self):
         """
         GIVEN:
@@ -78,7 +40,10 @@ class TestTikaParserAgainstServer(TestCase):
         """
         test_file = self.SAMPLE_DIR / Path("sample.odt")
 
-        self.try_parse_with_wait(test_file, "application/vnd.oasis.opendocument.text")
+        util_call_with_backoff(
+            self.parser.parse,
+            [test_file, "application/vnd.oasis.opendocument.text"],
+        )
 
         self.assertEqual(
             self.parser.text,
@@ -104,9 +69,12 @@ class TestTikaParserAgainstServer(TestCase):
         """
         test_file = self.SAMPLE_DIR / Path("sample.docx")
 
-        self.try_parse_with_wait(
-            test_file,
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        util_call_with_backoff(
+            self.parser.parse,
+            [
+                test_file,
+                "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            ],
         )
 
         self.assertEqual(
@@ -131,9 +99,9 @@ class TestTikaParserAgainstServer(TestCase):
         """
         test_file = self.SAMPLE_DIR / "sample.doc"
 
-        self.try_parse_with_wait(
-            test_file,
-            "application/msword",
+        util_call_with_backoff(
+            self.parser.parse,
+            [test_file, "application/msword"],
         )
 
         self.assertIn(