-import sys
import uuid
from pathlib import Path
from unittest import mock
-import pytest
from django.test import TestCase
from django.test import override_settings
self.fail(f"'{s}' is not in '{content}'")
self.assertListEqual(indices, sorted(indices))
- @pytest.mark.skipif(
- sys.version_info > (3, 10),
- reason="Fails on 3.11 only on CI, for some reason",
- ) # TODO: investigate
- @mock.patch("azure.ai.formrecognizer.DocumentAnalysisClient")
- def test_get_text_with_azure(self, mock_azure_client):
- result = mock.Mock()
- result.content = "This is a test document."
- result.pages = [
- mock.Mock(
- width=100,
- height=100,
- words=[
- mock.Mock(
- content="This",
- polygon=[
- mock.Mock(x=0, y=0),
- ],
- ),
- mock.Mock(
- content="is",
- polygon=[
- mock.Mock(x=10, y=10),
- ],
- ),
- mock.Mock(
- content="a",
- polygon=[
- mock.Mock(x=20, y=20),
- ],
- ),
- mock.Mock(
- content="test",
- polygon=[
- mock.Mock(x=30, y=30),
- ],
- ),
- mock.Mock(
- content="document.",
- polygon=[
- mock.Mock(x=40, y=40),
- ],
- ),
- ],
- ),
+ @mock.patch("paperless_remote.parsers.subprocess.run")
+ @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
+ def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
+ # Arrange mock Azure client
+ mock_client = mock.Mock()
+ mock_client_cls.return_value = mock_client
+
+ # Simulate poller result and its `.details`
+ mock_poller = mock.Mock()
+ mock_poller.wait.return_value = None
+ mock_poller.details = {"operation_id": "fake-op-id"}
+ mock_client.begin_analyze_document.return_value = mock_poller
+
+ # Return dummy PDF bytes
+ mock_client.get_analyze_result_pdf.return_value = [
+ b"%PDF-",
+ b"1.7 ",
+ b"FAKEPDF",
]
- mock_azure_client.return_value.begin_analyze_document.return_value.result.return_value = result
+ # Simulate pdftotext by writing dummy text to sidecar file
+ def fake_run(cmd, *args, **kwargs):
+ with Path(cmd[-1]).open("w", encoding="utf-8") as f:
+ f.write("This is a test document.")
+
+ mock_subprocess.side_effect = fake_run
with override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey",
- REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com/",
+ REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
):
parser = RemoteDocumentParser(uuid.uuid4())
parser.parse(