]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fixes relative date Whoosh queries and adds testing to ensure it remains working...
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 31 Jan 2023 17:54:11 +0000 (09:54 -0800)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 31 Jan 2023 20:53:46 +0000 (12:53 -0800)
src/documents/index.py
src/documents/tests/test_api.py

index 7add84bb71eca2a295274ce82717de7c4cb1d17e..575e57e8be1dfbe3cddac7539e6d1b1677e1c00b 100644 (file)
@@ -5,6 +5,7 @@ from contextlib import contextmanager
 
 from dateutil.parser import isoparse
 from django.conf import settings
+from django.utils import timezone
 from documents.models import Comment
 from documents.models import Document
 from whoosh import classify
@@ -262,7 +263,7 @@ class DelayedFullTextQuery(DelayedQuery):
             ["content", "title", "correspondent", "tag", "type", "comments"],
             self.searcher.ixreader.schema,
         )
-        qp.add_plugin(DateParserPlugin())
+        qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
         q = qp.parse(q_str)
 
         corrected = self.searcher.correct_query(q, q_str)
index 4547288d2a4d32b52a9098b1f4e119eb8f7b715f..636bfb0e599243965890f2a457b6564d5fc5a93e 100644 (file)
@@ -7,6 +7,7 @@ import tempfile
 import urllib.request
 import uuid
 import zipfile
+from datetime import timedelta
 from pathlib import Path
 from unittest import mock
 from unittest.mock import MagicMock
@@ -23,6 +24,7 @@ from django.conf import settings
 from django.contrib.auth.models import User
 from django.test import override_settings
 from django.utils import timezone
+from dateutil.relativedelta import relativedelta
 from documents import bulk_edit
 from documents import index
 from documents.models import Correspondent
@@ -505,6 +507,270 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
         response = self.client.get("/api/documents/?query=content&page=3&page_size=10")
         self.assertEqual(response.status_code, 404)
 
+    @override_settings(
+        TIME_ZONE="UTC",
+    )
+    def test_search_added_in_last_week(self):
+        """
+        GIVEN:
+            - Three documents added right now
+            - The timezone is UTC time
+        WHEN:
+            - Query for documents added in the last 7 days
+        THEN:
+            - The two recent documents are returned
+        """
+        d1 = Document.objects.create(
+            title="invoice",
+            content="the thing i bought at a shop and paid with bank account",
+            checksum="A",
+            pk=1,
+        )
+        d2 = Document.objects.create(
+            title="bank statement 1",
+            content="things i paid for in august",
+            pk=2,
+            checksum="B",
+        )
+        d3 = Document.objects.create(
+            title="bank statement 3",
+            content="things i paid for in september",
+            pk=3,
+            checksum="C",
+        )
+        with index.open_index_writer() as writer:
+            index.update_document(writer, d1)
+            index.update_document(writer, d2)
+            index.update_document(writer, d3)
+
+        response = self.client.get("/api/documents/?query=added:[-1 week to now]")
+        results = response.data["results"]
+        # Expect 3 documents returned
+        self.assertEqual(len(results), 3)
+
+        for idx, subset in enumerate(
+            [
+                {"id": 1, "title": "invoice"},
+                {"id": 2, "title": "bank statement 1"},
+                {"id": 3, "title": "bank statement 3"},
+            ],
+        ):
+            result = results[idx]
+            # Assert subset in results
+            self.assertDictEqual(result, {**result, **subset})
+
+    @override_settings(
+        TIME_ZONE="America/Chicago",
+    )
+    def test_search_added_in_last_week_with_timezone_behind(self):
+        """
+        GIVEN:
+            - Two documents added right now
+            - One document added over a week ago
+            - The timezone is behind UTC time (-6)
+        WHEN:
+            - Query for documents added in the last 7 days
+        THEN:
+            - The two recent documents are returned
+        """
+        d1 = Document.objects.create(
+            title="invoice",
+            content="the thing i bought at a shop and paid with bank account",
+            checksum="A",
+            pk=1,
+        )
+        d2 = Document.objects.create(
+            title="bank statement 1",
+            content="things i paid for in august",
+            pk=2,
+            checksum="B",
+        )
+        d3 = Document.objects.create(
+            title="bank statement 3",
+            content="things i paid for in september",
+            pk=3,
+            checksum="C",
+            # 7 days, 1 hour and 1 minute ago
+            added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
+        )
+        with index.open_index_writer() as writer:
+            index.update_document(writer, d1)
+            index.update_document(writer, d2)
+            index.update_document(writer, d3)
+
+        response = self.client.get("/api/documents/?query=added:[-1 week to now]")
+        results = response.data["results"]
+
+        # Expect 2 documents returned
+        self.assertEqual(len(results), 2)
+
+        for idx, subset in enumerate(
+            [{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
+        ):
+            result = results[idx]
+            # Assert subset in results
+            self.assertDictEqual(result, {**result, **subset})
+
+    @override_settings(
+        TIME_ZONE="Europe/Sofia",
+    )
+    def test_search_added_in_last_week_with_timezone_ahead(self):
+        """
+        GIVEN:
+            - Two documents added right now
+            - One document added over a week ago
+            - The timezone is behind UTC time (+2)
+        WHEN:
+            - Query for documents added in the last 7 days
+        THEN:
+            - The two recent documents are returned
+        """
+        d1 = Document.objects.create(
+            title="invoice",
+            content="the thing i bought at a shop and paid with bank account",
+            checksum="A",
+            pk=1,
+        )
+        d2 = Document.objects.create(
+            title="bank statement 1",
+            content="things i paid for in august",
+            pk=2,
+            checksum="B",
+        )
+        d3 = Document.objects.create(
+            title="bank statement 3",
+            content="things i paid for in september",
+            pk=3,
+            checksum="C",
+            # 7 days, 1 hour and 1 minute ago
+            added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
+        )
+        with index.open_index_writer() as writer:
+            index.update_document(writer, d1)
+            index.update_document(writer, d2)
+            index.update_document(writer, d3)
+
+        response = self.client.get("/api/documents/?query=added:[-1 week to now]")
+        results = response.data["results"]
+
+        # Expect 2 documents returned
+        self.assertEqual(len(results), 2)
+
+        for idx, subset in enumerate(
+            [{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
+        ):
+            result = results[idx]
+            # Assert subset in results
+            self.assertDictEqual(result, {**result, **subset})
+
+    def test_search_added_in_last_month(self):
+        """
+        GIVEN:
+            - One document added right now
+            - One documents added about a week ago
+            - One document added over 1 month
+        WHEN:
+            - Query for documents added in the last month
+        THEN:
+            - The two recent documents are returned
+        """
+        d1 = Document.objects.create(
+            title="invoice",
+            content="the thing i bought at a shop and paid with bank account",
+            checksum="A",
+            pk=1,
+        )
+        d2 = Document.objects.create(
+            title="bank statement 1",
+            content="things i paid for in august",
+            pk=2,
+            checksum="B",
+            # 1 month, 1 day ago
+            added=timezone.now() - relativedelta(months=1, days=1),
+        )
+        d3 = Document.objects.create(
+            title="bank statement 3",
+            content="things i paid for in september",
+            pk=3,
+            checksum="C",
+            # 7 days, 1 hour and 1 minute ago
+            added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
+        )
+
+        with index.open_index_writer() as writer:
+            index.update_document(writer, d1)
+            index.update_document(writer, d2)
+            index.update_document(writer, d3)
+
+        response = self.client.get("/api/documents/?query=added:[-1 month to now]")
+        results = response.data["results"]
+
+        # Expect 2 documents returned
+        self.assertEqual(len(results), 2)
+
+        for idx, subset in enumerate(
+            [{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
+        ):
+            result = results[idx]
+            # Assert subset in results
+            self.assertDictEqual(result, {**result, **subset})
+
+    @override_settings(
+        TIME_ZONE="America/Denver",
+    )
+    def test_search_added_in_last_month_timezone_behind(self):
+        """
+        GIVEN:
+            - One document added right now
+            - One documents added about a week ago
+            - One document added over 1 month
+            - The timezone is behind UTC time (-6 or -7)
+        WHEN:
+            - Query for documents added in the last month
+        THEN:
+            - The two recent documents are returned
+        """
+        d1 = Document.objects.create(
+            title="invoice",
+            content="the thing i bought at a shop and paid with bank account",
+            checksum="A",
+            pk=1,
+        )
+        d2 = Document.objects.create(
+            title="bank statement 1",
+            content="things i paid for in august",
+            pk=2,
+            checksum="B",
+            # 1 month, 1 day ago
+            added=timezone.now() - relativedelta(months=1, days=1),
+        )
+        d3 = Document.objects.create(
+            title="bank statement 3",
+            content="things i paid for in september",
+            pk=3,
+            checksum="C",
+            # 7 days, 1 hour and 1 minute ago
+            added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
+        )
+
+        with index.open_index_writer() as writer:
+            index.update_document(writer, d1)
+            index.update_document(writer, d2)
+            index.update_document(writer, d3)
+
+        response = self.client.get("/api/documents/?query=added:[-1 month to now]")
+        results = response.data["results"]
+
+        # Expect 2 documents returned
+        self.assertEqual(len(results), 2)
+
+        for idx, subset in enumerate(
+            [{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
+        ):
+            result = results[idx]
+            # Assert subset in results
+            self.assertDictEqual(result, {**result, **subset})
+
     @mock.patch("documents.index.autocomplete")
     def test_search_autocomplete(self, m):
         m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]