from whoosh.qparser import MultifieldParser
from whoosh.qparser import QueryParser
from whoosh.qparser.dateparse import DateParserPlugin
+from whoosh.qparser.dateparse import English
from whoosh.scoring import TF_IDF
from whoosh.searching import ResultsPage
from whoosh.searching import Searcher
+from whoosh.util.times import timespan
from whoosh.writing import AsyncWriter
# from documents.models import CustomMetadata
return page
+class LocalDateParser(English):
+ def reverse_timezone_offset(self, d):
+ return (d.replace(tzinfo=timezone.get_current_timezone())).astimezone(
+ timezone.utc,
+ )
+
+ def date_from(self, *args, **kwargs):
+ d = super().date_from(*args, **kwargs)
+ if isinstance(d, timespan):
+ d.start = self.reverse_timezone_offset(d.start)
+ d.end = self.reverse_timezone_offset(d.end)
+ else:
+ d = self.reverse_timezone_offset(d)
+ return d
+
+
class DelayedFullTextQuery(DelayedQuery):
def _get_query(self):
q_str = self.query_params["query"]
],
self.searcher.ixreader.schema,
)
- qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
+ qp.add_plugin(
+ DateParserPlugin(
+ basedate=timezone.now(),
+ dateparser=LocalDateParser(),
+ ),
+ )
q = qp.parse(q_str)
corrected = self.searcher.correct_query(q, q_str)
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
+ @override_settings(
+ TIME_ZONE="Europe/Sofia",
+ )
+ def test_search_added_specific_date_with_timezone_ahead(self):
+ """
+ GIVEN:
+ - Two documents added right now
+ - One document added on a specific date
+ - The timezone is behind UTC time (+2)
+ WHEN:
+ - Query for documents added on a specific date
+ THEN:
+ - The one document is returned
+ """
+ d1 = Document.objects.create(
+ title="invoice",
+ content="the thing i bought at a shop and paid with bank account",
+ checksum="A",
+ pk=1,
+ )
+ d2 = Document.objects.create(
+ title="bank statement 1",
+ content="things i paid for in august",
+ pk=2,
+ checksum="B",
+ )
+ d3 = Document.objects.create(
+ title="bank statement 3",
+ content="things i paid for in september",
+ pk=3,
+ checksum="C",
+ # specific time zone aware date
+ added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
+ )
+ # refresh doc instance to ensure we operate on date objects that Django uses
+ # Django converts dates to UTC
+ d3.refresh_from_db()
+
+ with index.open_index_writer() as writer:
+ index.update_document(writer, d1)
+ index.update_document(writer, d2)
+ index.update_document(writer, d3)
+
+ response = self.client.get("/api/documents/?query=added:20231201")
+ results = response.data["results"]
+
+ # Expect 1 document returned
+ self.assertEqual(len(results), 1)
+
+ for idx, subset in enumerate(
+ [{"id": 3, "title": "bank statement 3"}],
+ ):
+ result = results[idx]
+ # Assert subset in results
+ self.assertDictEqual(result, {**result, **subset})
+
def test_search_added_in_last_month(self):
"""
GIVEN: