import shutil
import tempfile
+import time
+import warnings
from collections import namedtuple
from contextlib import contextmanager
from os import PathLike
from pathlib import Path
+from typing import Any
+from typing import Callable
from typing import Iterator
+from typing import List
from typing import Tuple
from typing import Union
from unittest import mock
+import httpx
+import pytest
from django.apps import apps
from django.db import connection
from django.db.migrations.executor import MigrationExecutor
remove_dirs(dirs)
+def util_call_with_backoff(
+ method_or_callable: Callable,
+ args: Union[List, Tuple],
+ *,
+ skip_on_503=True,
+) -> Tuple[bool, Any]:
+ """
+ For whatever reason, the images started during the test pipeline like to
+ segfault sometimes, crash and otherwise fail randomly, when run with the
+ exact files that usually pass.
+
+ So, this function will retry the given method/function up to 3 times, with larger backoff
+ periods between each attempt, in hopes the issue resolves itself during
+ one attempt to parse.
+
+ This will wait the following:
+ - Attempt 1 - 20s following failure
+ - Attempt 2 - 40s following failure
+ - Attempt 3 - 80s following failure
+
+ """
+ result = None
+ succeeded = False
+ retry_time = 20.0
+ retry_count = 0
+ status_codes = []
+ max_retry_count = 3
+
+ while retry_count < max_retry_count and not succeeded:
+ try:
+ result = method_or_callable(*args)
+
+ succeeded = True
+ except httpx.HTTPError as exc:
+ warnings.warn(f"HTTP Exception for {exc.request.url} - {exc}")
+
+ if isinstance(exc, httpx.HTTPStatusError):
+ status_codes.append(exc.response.status_code)
+
+ retry_count = retry_count + 1
+
+ time.sleep(retry_time)
+ retry_time = retry_time * 2.0
+
+ if (
+ not succeeded
+ and status_codes
+ and skip_on_503
+ and all(element == httpx.codes.SERVICE_UNAVAILABLE for element in status_codes)
+ ):
+ pytest.skip("Repeated HTTP 503 for service")
+
+ return succeeded, result
+
+
class DirectoriesMixin:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
import os
-import time
from unittest import mock
import httpx
from PIL import Image
from documents.tests.utils import FileSystemAssertsMixin
+from documents.tests.utils import util_call_with_backoff
from paperless_mail.tests.test_parsers import BaseMailParserTestCase
def imagehash(file, hash_size=18):
return f"{average_hash(Image.open(file), hash_size)}"
- def util_call_with_backoff(self, method_or_callable, args):
- """
- For whatever reason, the image started during the test pipeline likes to
- segfault sometimes, when run with the exact files that usually pass.
-
- So, this function will retry the parsing up to 3 times, with larger backoff
- periods between each attempt, in hopes the issue resolves itself during
- one attempt to parse.
-
- This will wait the following:
- - Attempt 1 - 20s following failure
- - Attempt 2 - 40s following failure
- - Attempt 3 - 80s following failure
-
- """
- result = None
- succeeded = False
- retry_time = 20.0
- retry_count = 0
- max_retry_count = 3
-
- while retry_count < max_retry_count and not succeeded:
- try:
- result = method_or_callable(*args)
-
- succeeded = True
- except httpx.HTTPError as e:
- # Retry on HTTP errors
- print(f"{e} during try #{retry_count}", flush=True)
-
- retry_count = retry_count + 1
-
- time.sleep(retry_time)
- retry_time = retry_time * 2.0
- except Exception:
- # Not on other error
- raise
-
- self.assertTrue(
- succeeded,
- "Continued Tika server errors after multiple retries",
- )
-
- return result
-
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
"""
self.SAMPLE_DIR / "html.eml",
)
- pdf_path = self.util_call_with_backoff(
+ _, pdf_path = util_call_with_backoff(
self.parser.generate_pdf,
[msg],
)
- gotenberg is called and the resulting file is returned and look as expected.
"""
- self.util_call_with_backoff(
+ util_call_with_backoff(
self.parser.parse,
[self.SAMPLE_DIR / "html.eml", "message/rfc822"],
)
import os
-import time
from pathlib import Path
from typing import Final
import pytest
from django.test import TestCase
+from documents.tests.utils import util_call_with_backoff
from paperless_tika.parsers import TikaDocumentParser
def tearDown(self) -> None:
self.parser.cleanup()
- def try_parse_with_wait(self, test_file: Path, mime_type: str):
- """
- For whatever reason, the image started during the test pipeline likes to
- segfault sometimes, when run with the exact files that usually pass.
-
- So, this function will retry the parsing up to 3 times, with larger backoff
- periods between each attempt, in hopes the issue resolves itself during
- one attempt to parse.
-
- This will wait the following:
- - Attempt 1 - 20s following failure
- - Attempt 2 - 40s following failure
- - Attempt 3 - 80s following failure
-
- """
- succeeded = False
- retry_time = 20.0
- retry_count = 0
- max_retry_count = 3
-
- while retry_count < max_retry_count and not succeeded:
- try:
- self.parser.parse(test_file, mime_type)
-
- succeeded = True
- except Exception as e:
- print(f"{e} during try #{retry_count}", flush=True)
-
- retry_count = retry_count + 1
-
- time.sleep(retry_time)
- retry_time = retry_time * 2.0
-
- self.assertTrue(
- succeeded,
- "Continued Tika server errors after multiple retries",
- )
-
def test_basic_parse_odt(self):
"""
GIVEN:
"""
test_file = self.SAMPLE_DIR / Path("sample.odt")
- self.try_parse_with_wait(test_file, "application/vnd.oasis.opendocument.text")
+ util_call_with_backoff(
+ self.parser.parse,
+ [test_file, "application/vnd.oasis.opendocument.text"],
+ )
self.assertEqual(
self.parser.text,
"""
test_file = self.SAMPLE_DIR / Path("sample.docx")
- self.try_parse_with_wait(
- test_file,
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ util_call_with_backoff(
+ self.parser.parse,
+ [
+ test_file,
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ ],
)
self.assertEqual(
"""
test_file = self.SAMPLE_DIR / "sample.doc"
- self.try_parse_with_wait(
- test_file,
- "application/msword",
+ util_call_with_backoff(
+ self.parser.parse,
+ [test_file, "application/msword"],
)
self.assertIn(