]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fix: Convert search dates to UTC in advanced search (#4891)
authorAdam Bogdał <adam@bogdal.pl>
Mon, 11 Dec 2023 17:32:43 +0000 (18:32 +0100)
committerGitHub <noreply@github.com>
Mon, 11 Dec 2023 17:32:43 +0000 (09:32 -0800)
* Index documents using local timezone

* Add local date parser

src/documents/index.py
src/documents/tests/test_api.py

index 2e25850712113b09894209a9014e51ea4cb96417..da5168b9a0e4fb80325fb0a0f164118e03d6eaa5 100644 (file)
@@ -25,9 +25,11 @@ from whoosh.index import open_dir
 from whoosh.qparser import MultifieldParser
 from whoosh.qparser import QueryParser
 from whoosh.qparser.dateparse import DateParserPlugin
+from whoosh.qparser.dateparse import English
 from whoosh.scoring import TF_IDF
 from whoosh.searching import ResultsPage
 from whoosh.searching import Searcher
+from whoosh.util.times import timespan
 from whoosh.writing import AsyncWriter
 
 # from documents.models import CustomMetadata
@@ -356,6 +358,22 @@ class DelayedQuery:
         return page
 
 
+class LocalDateParser(English):
+    def reverse_timezone_offset(self, d):
+        return (d.replace(tzinfo=timezone.get_current_timezone())).astimezone(
+            timezone.utc,
+        )
+
+    def date_from(self, *args, **kwargs):
+        d = super().date_from(*args, **kwargs)
+        if isinstance(d, timespan):
+            d.start = self.reverse_timezone_offset(d.start)
+            d.end = self.reverse_timezone_offset(d.end)
+        else:
+            d = self.reverse_timezone_offset(d)
+        return d
+
+
 class DelayedFullTextQuery(DelayedQuery):
     def _get_query(self):
         q_str = self.query_params["query"]
@@ -371,7 +389,12 @@ class DelayedFullTextQuery(DelayedQuery):
             ],
             self.searcher.ixreader.schema,
         )
-        qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
+        qp.add_plugin(
+            DateParserPlugin(
+                basedate=timezone.now(),
+                dateparser=LocalDateParser(),
+            ),
+        )
         q = qp.parse(q_str)
 
         corrected = self.searcher.correct_query(q, q_str)
index e671ce2cec22e2d4ce08348033725c95e95efd5e..c2ade1d453ffc08c1c3043906320c29d7316e029 100644 (file)
@@ -964,6 +964,62 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
             # Assert subset in results
             self.assertDictEqual(result, {**result, **subset})
 
+    @override_settings(
+        TIME_ZONE="Europe/Sofia",
+    )
+    def test_search_added_specific_date_with_timezone_ahead(self):
+        """
+        GIVEN:
+            - Two documents added right now
+            - One document added on a specific date
+            - The timezone is behind UTC time (+2)
+        WHEN:
+            - Query for documents added on a specific date
+        THEN:
+            - The one document is returned
+        """
+        d1 = Document.objects.create(
+            title="invoice",
+            content="the thing i bought at a shop and paid with bank account",
+            checksum="A",
+            pk=1,
+        )
+        d2 = Document.objects.create(
+            title="bank statement 1",
+            content="things i paid for in august",
+            pk=2,
+            checksum="B",
+        )
+        d3 = Document.objects.create(
+            title="bank statement 3",
+            content="things i paid for in september",
+            pk=3,
+            checksum="C",
+            # specific time zone aware date
+            added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
+        )
+        # refresh doc instance to ensure we operate on date objects that Django uses
+        # Django converts dates to UTC
+        d3.refresh_from_db()
+
+        with index.open_index_writer() as writer:
+            index.update_document(writer, d1)
+            index.update_document(writer, d2)
+            index.update_document(writer, d3)
+
+        response = self.client.get("/api/documents/?query=added:20231201")
+        results = response.data["results"]
+
+        # Expect 1 document returned
+        self.assertEqual(len(results), 1)
+
+        for idx, subset in enumerate(
+            [{"id": 3, "title": "bank statement 3"}],
+        ):
+            result = results[idx]
+            # Assert subset in results
+            self.assertDictEqual(result, {**result, **subset})
+
     def test_search_added_in_last_month(self):
         """
         GIVEN: