]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Test
authorshamoon <4887959+shamoon@users.noreply.github.com>
Fri, 18 Apr 2025 20:08:10 +0000 (13:08 -0700)
committershamoon <4887959+shamoon@users.noreply.github.com>
Tue, 8 Jul 2025 21:19:43 +0000 (14:19 -0700)
src/paperless_remote/tests/test_parser.py

index 0bc046037d922f771d278ccd4c20edd30de49fe3..0a775344910bddafff3be387e07ab431bb27fafb 100644 (file)
@@ -1,9 +1,7 @@
-import sys
 import uuid
 from pathlib import Path
 from unittest import mock
 
-import pytest
 from django.test import TestCase
 from django.test import override_settings
 
@@ -25,59 +23,37 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
                 self.fail(f"'{s}' is not in '{content}'")
         self.assertListEqual(indices, sorted(indices))
 
-    @pytest.mark.skipif(
-        sys.version_info > (3, 10),
-        reason="Fails on 3.11 only on CI, for some reason",
-    )  # TODO: investigate
-    @mock.patch("azure.ai.formrecognizer.DocumentAnalysisClient")
-    def test_get_text_with_azure(self, mock_azure_client):
-        result = mock.Mock()
-        result.content = "This is a test document."
-        result.pages = [
-            mock.Mock(
-                width=100,
-                height=100,
-                words=[
-                    mock.Mock(
-                        content="This",
-                        polygon=[
-                            mock.Mock(x=0, y=0),
-                        ],
-                    ),
-                    mock.Mock(
-                        content="is",
-                        polygon=[
-                            mock.Mock(x=10, y=10),
-                        ],
-                    ),
-                    mock.Mock(
-                        content="a",
-                        polygon=[
-                            mock.Mock(x=20, y=20),
-                        ],
-                    ),
-                    mock.Mock(
-                        content="test",
-                        polygon=[
-                            mock.Mock(x=30, y=30),
-                        ],
-                    ),
-                    mock.Mock(
-                        content="document.",
-                        polygon=[
-                            mock.Mock(x=40, y=40),
-                        ],
-                    ),
-                ],
-            ),
+    @mock.patch("paperless_remote.parsers.subprocess.run")
+    @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
+    def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
+        # Arrange mock Azure client
+        mock_client = mock.Mock()
+        mock_client_cls.return_value = mock_client
+
+        # Simulate poller result and its `.details`
+        mock_poller = mock.Mock()
+        mock_poller.wait.return_value = None
+        mock_poller.details = {"operation_id": "fake-op-id"}
+        mock_client.begin_analyze_document.return_value = mock_poller
+
+        # Return dummy PDF bytes
+        mock_client.get_analyze_result_pdf.return_value = [
+            b"%PDF-",
+            b"1.7 ",
+            b"FAKEPDF",
         ]
 
-        mock_azure_client.return_value.begin_analyze_document.return_value.result.return_value = result
+        # Simulate pdftotext by writing dummy text to sidecar file
+        def fake_run(cmd, *args, **kwargs):
+            with Path(cmd[-1]).open("w", encoding="utf-8") as f:
+                f.write("This is a test document.")
+
+        mock_subprocess.side_effect = fake_run
 
         with override_settings(
             REMOTE_OCR_ENGINE="azureai",
             REMOTE_OCR_API_KEY="somekey",
-            REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com/",
+            REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
         ):
             parser = RemoteDocumentParser(uuid.uuid4())
             parser.parse(