Feature: Switches to a new client to handle communication with Gotenberg (#4391)

author Trenton H <797416+stumpylog@users.noreply.github.com>

Fri, 20 Oct 2023 00:27:29 +0000 (17:27 -0700)

committer GitHub <noreply@github.com>

Fri, 20 Oct 2023 00:27:29 +0000 (00:27 +0000)
author Trenton H <797416+stumpylog@users.noreply.github.com>
Fri, 20 Oct 2023 00:27:29 +0000 (17:27 -0700)
committer GitHub <noreply@github.com>
Fri, 20 Oct 2023 00:27:29 +0000 (00:27 +0000)
diff --git a/Pipfile b/Pipfile

index af6f4e4fd25143697d0642e5f31cda1e4c26d4d5..d8b66d7190742f44199e4060987187383e59e8e9 100644 (file)
--- a/Pipfile
+++ b/Pipfile
@@ -51,6 +51,7 @@ flower = "*"
  bleach = "*"
  zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}
  django-multiselectfield = "*"
+gotenberg-client = "*"
  
  [dev-packages]
  # Linting
diff --git a/Pipfile.lock b/Pipfile.lock

index 8a469ca92d7e9447a9f46855660216991b984566..67cdc29b1c4097804354087324871c8bca8cde72 100644 (file)
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
  {
      "_meta": {
          "hash": {
-            "sha256": "3025da2940433d347b2fd2ac222852c21f4aa73eeefbd1ee9152cbfd7a7a48e9"
+            "sha256": "505bd6b18d31ed64988ef307c12a5acb70f611cafd932a391e985a11bbbc8000"
          },
          "pipfile-spec": 6,
          "requires": {},
@@ -539,6 +539,15 @@
              "markers": "python_version >= '3.7'",
              "version": "==2.0.1"
          },
+        "gotenberg-client": {
+            "hashes": [
+                "sha256:4508ecb913ef2d553dd2ceb78e32cee001000ba08c910ba1f9ace38350d1589e",
+                "sha256:7a3f8a02caee768391373b3610c6ec25a853cccf391ed6b5d5a1292c3ed15e7e"
+            ],
+            "index": "pypi",
+            "markers": "python_version >= '3.8'",
+            "version": "==0.3.0"
+        },
          "gunicorn": {
              "hashes": [
                  "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0",
@@ -556,6 +565,13 @@
              "markers": "python_version >= '3.7'",
              "version": "==0.14.0"
          },
+        "h2": {
+            "hashes": [
+                "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d",
+                "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"
+            ],
+            "version": "==4.1.0"
+        },
          "hiredis": {
              "hashes": [
                  "sha256:071c5814b850574036506a8118034f97c3cbf2fe9947ff45a27b07a48da56240",
@@ -650,6 +666,14 @@
              ],
              "version": "==2.2.3"
          },
+        "hpack": {
+            "hashes": [
+                "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c",
+                "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"
+            ],
+            "markers": "python_full_version >= '3.6.1'",
+            "version": "==4.0.0"
+        },
          "httpcore": {
              "hashes": [
                  "sha256:13b5e5cd1dca1a6636a6aaea212b19f4f85cd88c366a2b82304181b769aab3c9",
@@ -699,6 +723,9 @@
              "version": "==0.6.0"
          },
          "httpx": {
+            "extras": [
+                "http2"
+            ],
              "hashes": [
                  "sha256:181ea7f8ba3a82578be86ef4171554dd45fec26a02556a744db029a0a27b7100",
                  "sha256:47ecda285389cb32bb2691cc6e069e3ab0205956f681c5b2ad2325719751d875"
@@ -714,6 +741,14 @@
              "markers": "python_version >= '3.8'",
              "version": "==4.8.0"
          },
+        "hyperframe": {
+            "hashes": [
+                "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15",
+                "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"
+            ],
+            "markers": "python_full_version >= '3.6.1'",
+            "version": "==6.0.1"
+        },
          "idna": {
              "hashes": [
                  "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
@@ -1782,7 +1817,7 @@
                  "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0",
                  "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"
              ],
-            "markers": "python_version < '3.11'",
+            "markers": "python_version < '3.10'",
              "version": "==4.8.0"
          },
          "tzdata": {
diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py

index bcfdd5b3d9bb45c36485e3dbe6013ec3c138535b..4b3e5686eb530326e9f2be38be4c748fde22985c 100644 (file)
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -1,13 +1,17 @@
  import re
  from html import escape
  from pathlib import Path
+from typing import Optional
  
-import httpx
  from bleach import clean
  from bleach import linkify
  from django.conf import settings
  from django.utils.timezone import is_naive
  from django.utils.timezone import make_aware
+from gotenberg_client import GotenbergClient
+from gotenberg_client.options import Margin
+from gotenberg_client.options import PageSize
+from gotenberg_client.options import PdfAFormat
  from humanize import naturalsize
  from imap_tools import MailAttachment
  from imap_tools import MailMessage
@@ -24,11 +28,22 @@ class MailDocumentParser(DocumentParser):
      Gotenberg and sends the html part to a Tika server for text extraction.
      """
  
-    gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
-    tika_server = settings.TIKA_ENDPOINT
-
      logging_name = "paperless.parsing.mail"
  
+    @staticmethod
+    def _settings_to_gotenberg_pdfa() -> Optional[PdfAFormat]:
+        """
+        Converts our requested PDF/A output into the Gotenberg API
+        format
+        """
+        if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
+            return PdfAFormat.A2b
+        elif settings.OCR_OUTPUT_TYPE == "pdfa-1":  # pragma: no cover
+            return PdfAFormat.A1a
+        elif settings.OCR_OUTPUT_TYPE == "pdfa-3":  # pragma: no cover
+            return PdfAFormat.A3b
+        return None
+
      def get_thumbnail(self, document_path: Path, mime_type: str, file_name=None):
          if not self.archive_path:
              self.archive_path = self.generate_pdf(
@@ -173,7 +188,7 @@ class MailDocumentParser(DocumentParser):
          self.log.info("Sending content to Tika server")
  
          try:
-            with TikaClient(tika_url=self.tika_server) as client:
+            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
                  parsed = client.tika.as_text.from_buffer(html, "text/html")
  
                  if parsed.content is not None:
@@ -182,7 +197,7 @@ class MailDocumentParser(DocumentParser):
          except Exception as err:
              raise ParseError(
                  f"Could not parse content with tika server at "
-                f"{self.tika_server}: {err}",
+                f"{settings.TIKA_ENDPOINT}: {err}",
              ) from err
  
      def generate_pdf(self, mail_message: MailMessage) -> Path:
@@ -195,45 +210,29 @@ class MailDocumentParser(DocumentParser):
          if not mail_message.html:
              archive_path.write_bytes(mail_pdf_file.read_bytes())
          else:
-            url_merge = self.gotenberg_server + "/forms/pdfengines/merge"
-
              pdf_of_html_content = self.generate_pdf_from_html(
                  mail_message.html,
                  mail_message.attachments,
              )
  
-            pdf_collection = {
-                "1_mail.pdf": ("1_mail.pdf", mail_pdf_file, "application/pdf"),
-                "2_html.pdf": ("2_html.pdf", pdf_of_html_content, "application/pdf"),
-            }
-
-            try:
-                # Open a handle to each file, replacing the tuple
-                for filename in pdf_collection:
-                    file_multi_part = pdf_collection[filename]
-                    pdf_collection[filename] = (
-                        file_multi_part[0],
-                        file_multi_part[1].open("rb"),
-                        file_multi_part[2],
-                    )
-
-                response = httpx.post(
-                    url_merge,
-                    files=pdf_collection,
-                    timeout=settings.CELERY_TASK_TIME_LIMIT,
-                )
-                response.raise_for_status()  # ensure we notice bad responses
-
-                archive_path.write_bytes(response.content)
-
-            except Exception as err:
-                raise ParseError(
-                    f"Error while merging email HTML into PDF: {err}",
-                ) from err
-            finally:
-                for filename in pdf_collection:
-                    file_multi_part_handle = pdf_collection[filename][1]
-                    file_multi_part_handle.close()
+            with GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client, client.merge.merge() as route:
+                # Configure requested PDF/A formatting, if any
+                pdf_a_format = self._settings_to_gotenberg_pdfa()
+                if pdf_a_format is not None:
+                    route.pdf_format(pdf_a_format)
+
+                route.merge([mail_pdf_file, pdf_of_html_content])
+
+                try:
+                    response = route.run()
+                    archive_path.write_bytes(response.content)
+                except Exception as err:
+                    raise ParseError(
+                        f"Error while merging email HTML into PDF: {err}",
+                    ) from err
  
          return archive_path
  
@@ -299,48 +298,29 @@ class MailDocumentParser(DocumentParser):
          Creates a PDF based on the given email, using the email's values in a
          an HTML template
          """
-        url = self.gotenberg_server + "/forms/chromium/convert/html"
          self.log.info("Converting mail to PDF")
  
          css_file = Path(__file__).parent / "templates" / "output.css"
          email_html_file = self.mail_to_html(mail)
  
-        with css_file.open("rb") as css_handle, email_html_file.open(
-            "rb",
-        ) as email_html_handle:
-            files = {
-                "html": ("index.html", email_html_handle, "text/html"),
-                "css": ("output.css", css_handle, "text/css"),
-            }
-            headers = {}
-            data = {
-                "marginTop": "0.1",
-                "marginBottom": "0.1",
-                "marginLeft": "0.1",
-                "marginRight": "0.1",
-                "paperWidth": "8.27",
-                "paperHeight": "11.7",
-                "scale": "1.0",
-            }
-
-            # Set the output format of the resulting PDF
-            # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
-            if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
-                data["pdfFormat"] = "PDF/A-2b"
-            elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
-                data["pdfFormat"] = "PDF/A-1a"
-            elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
-                data["pdfFormat"] = "PDF/A-3b"
+        with GotenbergClient(
+            host=settings.TIKA_GOTENBERG_ENDPOINT,
+            timeout=settings.CELERY_TASK_TIME_LIMIT,
+        ) as client, client.chromium.html_to_pdf() as route:
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
  
              try:
-                response = httpx.post(
-                    url,
-                    files=files,
-                    headers=headers,
-                    data=data,
-                    timeout=settings.CELERY_TASK_TIME_LIMIT,
+                response = (
+                    route.index(email_html_file)
+                    .resource(css_file)
+                    .margins(Margin(top=0.1, bottom=0.1, left=0.1, right=0.1))
+                    .size(PageSize(height=11.7, width=8.27))
+                    .scale(1.0)
+                    .run()
                  )
-                response.raise_for_status()  # ensure we notice bad responses
              except Exception as err:
                  raise ParseError(
                      f"Error while converting email to PDF: {err}",
@@ -368,69 +348,57 @@ class MailDocumentParser(DocumentParser):
              text = compiled_close.sub("</div", text)
              return text
  
-        url = self.gotenberg_server + "/forms/chromium/convert/html"
          self.log.info("Converting html to PDF")
  
          tempdir = Path(self.tempdir)
  
          html_clean = clean_html_script(orig_html)
-
-        files = {}
-
-        for attachment in attachments:
-            # Clean the attachment name to be valid
-            name_cid = f"cid:{attachment.content_id}"
-            name_clean = "".join(e for e in name_cid if e.isalnum())
-
-            # Write attachment payload to a temp file
-            temp_file = tempdir / name_clean
-            temp_file.write_bytes(attachment.payload)
-
-            # Store the attachment for upload
-            files[name_clean] = (name_clean, temp_file, attachment.content_type)
-
-            # Replace as needed the name with the clean name
-            html_clean = html_clean.replace(name_cid, name_clean)
-
-        # Now store the cleaned up HTML version
          html_clean_file = tempdir / "index.html"
          html_clean_file.write_text(html_clean)
  
-        files["index.html"] = ("index.html", html_clean_file, "text/html")
-
-        data = {
-            "marginTop": "0.1",
-            "marginBottom": "0.1",
-            "marginLeft": "0.1",
-            "marginRight": "0.1",
-            "paperWidth": "8.27",
-            "paperHeight": "11.7",
-            "scale": "1.0",
-        }
-        try:
-            # Open a handle to each file, replacing the tuple
-            for filename in files:
-                file_multi_part = files[filename]
-                files[filename] = (
-                    file_multi_part[0],
-                    file_multi_part[1].open("rb"),
-                    file_multi_part[2],
-                )
+        with GotenbergClient(
+            host=settings.TIKA_GOTENBERG_ENDPOINT,
+            timeout=settings.CELERY_TASK_TIME_LIMIT,
+        ) as client, client.chromium.html_to_pdf() as route:
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
+
+            # Add attachments as resources, cleaning the filename and replacing
+            # it in the index file for inclusion
+            for attachment in attachments:
+                # Clean the attachment name to be valid
+                name_cid = f"cid:{attachment.content_id}"
+                name_clean = "".join(e for e in name_cid if e.isalnum())
+
+                # Write attachment payload to a temp file
+                temp_file = tempdir / name_clean
+                temp_file.write_bytes(attachment.payload)
+
+                route.resource(temp_file)
+
+                # Replace as needed the name with the clean name
+                html_clean = html_clean.replace(name_cid, name_clean)
+
+            # Now store the cleaned up HTML version
+            html_clean_file = tempdir / "index.html"
+            html_clean_file.write_text(html_clean)
+            # This is our index file, the main page basically
+            route.index(html_clean_file)
+
+            # Set page size, margins
+            route.margins(Margin(top=0.1, bottom=0.1, left=0.1, right=0.1)).size(
+                PageSize(height=11.7, width=8.27),
+            ).scale(1.0)
  
-            response = httpx.post(
-                url,
-                files=files,
-                data=data,
-                timeout=settings.CELERY_TASK_TIME_LIMIT,
-            )
-            response.raise_for_status()  # ensure we notice bad responses
-        except Exception as err:
-            raise ParseError(f"Error while converting document to PDF: {err}") from err
-        finally:
-            # Ensure all file handles as closed
-            for filename in files:
-                file_multi_part_handle = files[filename][1]
-                file_multi_part_handle.close()
+            try:
+                response = route.run()
+
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting document to PDF: {err}",
+                ) from err
  
          html_pdf = tempdir / "html.pdf"
          html_pdf.write_bytes(response.content)
diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py

index 41787adda16b242f833d38bfaadb2982b4acd043..5bcff19f643eb1a15b9548fed2556b859b163e8c 100644 (file)
--- a/src/paperless_mail/tests/test_parsers.py
+++ b/src/paperless_mail/tests/test_parsers.py
@@ -341,7 +341,7 @@ class TestTikaHtmlParse(HttpxMockMixin, BaseMailParserTestCase):
          )
          parsed = self.parser.tika_parse(html)
          self.assertEqual(expected_text, parsed.strip())
-        self.assertIn(self.parser.tika_server, str(self.httpx_mock.get_request().url))
+        self.assertIn("http://localhost:9998", str(self.httpx_mock.get_request().url))
  
      def test_tika_parse_exception(self):
          """
@@ -653,5 +653,5 @@ class TestParser(FileSystemAssertsMixin, HttpxMockMixin, BaseMailParserTestCase)
  
          self.assertEqual(
              str(request.url),
-            self.parser.gotenberg_server + "/forms/chromium/convert/html",
+            "http://localhost:3000/forms/chromium/convert/html",
          )
diff --git a/src/paperless_mail/tests/test_parsers_live.py b/src/paperless_mail/tests/test_parsers_live.py

index c58c1dfbcf60b077adcae342a979397a5537c5be..3260725a5bb7e0c52bf7fa28591314e5e8b9632c 100644 (file)
--- a/src/paperless_mail/tests/test_parsers_live.py
+++ b/src/paperless_mail/tests/test_parsers_live.py
@@ -1,11 +1,14 @@
  import os
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
  from unittest import mock
  
  import httpx
  import pytest
  from django.test import TestCase
  from imagehash import average_hash
-from pdfminer.high_level import extract_text
  from PIL import Image
  
  from documents.tests.utils import FileSystemAssertsMixin
@@ -13,6 +16,29 @@ from documents.tests.utils import util_call_with_backoff
  from paperless_mail.tests.test_parsers import BaseMailParserTestCase
  
  
+def extract_text(pdf_path: Path) -> str:
+    """
+    Using pdftotext from poppler, extracts the text of a PDF into a file,
+    then reads the file contents and returns it
+    """
+    with tempfile.NamedTemporaryFile(
+        mode="w+",
+    ) as tmp:
+        subprocess.run(
+            [
+                shutil.which("pdftotext"),
+                "-q",
+                "-layout",
+                "-enc",
+                "UTF-8",
+                str(pdf_path),
+                tmp.name,
+            ],
+            check=True,
+        )
+        return tmp.read()
+
+
  class MailAttachmentMock:
      def __init__(self, payload, content_id):
          self.payload = payload
@@ -150,7 +176,7 @@ class TestParserLive(FileSystemAssertsMixin, BaseMailParserTestCase):
  
          extracted = extract_text(pdf_path)
          expected = (
-            "first\tPDF\tto\tbe\tmerged.\n\n\x0csecond\tPDF\tto\tbe\tmerged.\n\n\x0c"
+            "first   PDF   to   be   merged.\n\x0csecond PDF   to   be   merged.\n\x0c"
          )
  
          self.assertEqual(expected, extracted)
diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py

index c410594bb8260aff2126bde001eda0f50aa51a88..c9056d90df84e50da6d1ef1d01f2cd9319a13cf6 100644 (file)
--- a/src/paperless_tika/parsers.py
+++ b/src/paperless_tika/parsers.py
@@ -1,9 +1,10 @@
-import os
  from pathlib import Path
  
  import httpx
  from django.conf import settings
  from django.utils import timezone
+from gotenberg_client import GotenbergClient
+from gotenberg_client.options import PdfAFormat
  from tika_client import TikaClient
  
  from documents.parsers import DocumentParser
@@ -80,47 +81,33 @@ class TikaDocumentParser(DocumentParser):
  
          self.archive_path = self.convert_to_pdf(document_path, file_name)
  
-    def convert_to_pdf(self, document_path, file_name):
-        pdf_path = os.path.join(self.tempdir, "convert.pdf")
-        gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
-        url = gotenberg_server + "/forms/libreoffice/convert"
+    def convert_to_pdf(self, document_path: Path, file_name):
+        pdf_path = Path(self.tempdir) / "convert.pdf"
  
          self.log.info(f"Converting {document_path} to PDF as {pdf_path}")
-        with open(document_path, "rb") as document_handle:
-            files = {
-                "files": (
-                    "convert" + os.path.splitext(document_path)[-1],
-                    document_handle,
-                ),
-            }
-            headers = {}
-            data = {}
  
+        with GotenbergClient(
+            host=settings.TIKA_GOTENBERG_ENDPOINT,
+            timeout=settings.CELERY_TASK_TIME_LIMIT,
+        ) as client, client.libre_office.to_pdf() as route:
              # Set the output format of the resulting PDF
-            # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
              if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
-                data["pdfFormat"] = "PDF/A-2b"
+                route.pdf_format(PdfAFormat.A2b)
              elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
-                data["pdfFormat"] = "PDF/A-1a"
+                route.pdf_format(PdfAFormat.A1a)
              elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
-                data["pdfFormat"] = "PDF/A-3b"
+                route.pdf_format(PdfAFormat.A3b)
+
+            route.convert(document_path)
  
              try:
-                response = httpx.post(
-                    url,
-                    files=files,
-                    headers=headers,
-                    data=data,
-                    timeout=settings.CELERY_TASK_TIME_LIMIT,
-                )
-                response.raise_for_status()  # ensure we notice bad responses
+                response = route.run()
+
+                pdf_path.write_bytes(response.content)
+
+                return pdf_path
+
              except Exception as err:
                  raise ParseError(
                      f"Error while converting document to PDF: {err}",
                  ) from err
-
-        with open(pdf_path, "wb") as file:
-            file.write(response.content)
-            file.close()
-
-        return pdf_path
diff --git a/src/paperless_tika/tests/test_tika_parser.py b/src/paperless_tika/tests/test_tika_parser.py

index f693aa4e7a9c1df6de0345c577cf2fda63505745..81d6f026af945445803914ec6699901f4f23b94c 100644 (file)
--- a/src/paperless_tika/tests/test_tika_parser.py
+++ b/src/paperless_tika/tests/test_tika_parser.py
@@ -2,12 +2,11 @@ import datetime
  import os
  import zoneinfo
  from pathlib import Path
-from unittest import mock
  
  from django.test import TestCase
  from django.test import override_settings
-from httpx import Request
-from httpx import Response
+from httpx import codes
+from httpx._multipart import DataField
  from rest_framework import status
  
  from documents.parsers import ParseError
@@ -95,8 +94,7 @@ class TestTikaParser(HttpxMockMixin, TestCase):
          with self.assertRaises(ParseError):
              self.parser.convert_to_pdf(file, None)
  
-    @mock.patch("paperless_tika.parsers.httpx.post")
-    def test_request_pdf_a_format(self, post: mock.Mock):
+    def test_request_pdf_a_format(self):
          """
          GIVEN:
              - Document needs to be converted to PDF
@@ -108,10 +106,6 @@ class TestTikaParser(HttpxMockMixin, TestCase):
          file = Path(os.path.join(self.parser.tempdir, "input.odt"))
          file.touch()
  
-        response = Response(status_code=status.HTTP_200_OK)
-        response.request = Request("POST", "/somewhere/")
-        post.return_value = response
-
          for setting, expected_key in [
              ("pdfa", "PDF/A-2b"),
              ("pdfa-2", "PDF/A-2b"),
@@ -119,11 +113,20 @@ class TestTikaParser(HttpxMockMixin, TestCase):
              ("pdfa-3", "PDF/A-3b"),
          ]:
              with override_settings(OCR_OUTPUT_TYPE=setting):
-                self.parser.convert_to_pdf(file, None)
+                self.httpx_mock.add_response(
+                    status_code=codes.OK,
+                    content=b"PDF document",
+                    method="POST",
+                )
  
-                post.assert_called_once()
-                _, kwargs = post.call_args
+                self.parser.convert_to_pdf(file, None)
  
-                self.assertEqual(kwargs["data"]["pdfFormat"], expected_key)
+                request = self.httpx_mock.get_request()
+                found = False
+                for field in request.stream.fields:
+                    if isinstance(field, DataField) and field.name == "pdfFormat":
+                        self.assertEqual(field.value, expected_key)
+                        found = True
+                self.assertTrue(found)
  
-                post.reset_mock()
+                self.httpx_mock.reset(assert_all_responses_were_requested=False)
diff --git a/src/setup.cfg b/src/setup.cfg

index fb6ecf3158ad025755b9f681365d09587cdb6455..e2e5cf8ea0558ce31b29a871cdad11ab6760e2d5 100644 (file)
--- a/src/setup.cfg
+++ b/src/setup.cfg
@@ -7,7 +7,7 @@ max-line-length = 88
  
  [tool:pytest]
  DJANGO_SETTINGS_MODULE=paperless.settings
-addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --quiet --durations=50
+addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50
  env =
    PAPERLESS_DISABLE_DBHANDLER=true
author	Trenton H <797416+stumpylog@users.noreply.github.com>
	Fri, 20 Oct 2023 00:27:29 +0000 (17:27 -0700)
committer	GitHub <noreply@github.com>
	Fri, 20 Oct 2023 00:27:29 +0000 (00:27 +0000)
Pipfile		patch \| blob \| blame \| history
Pipfile.lock		patch \| blob \| blame \| history
src/paperless_mail/parsers.py		patch \| blob \| blame \| history
src/paperless_mail/tests/test_parsers.py		patch \| blob \| blame \| history
src/paperless_mail/tests/test_parsers_live.py		patch \| blob \| blame \| history
src/paperless_tika/parsers.py		patch \| blob \| blame \| history
src/paperless_tika/tests/test_tika_parser.py		patch \| blob \| blame \| history
src/setup.cfg		patch \| blob \| blame \| history