From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:26:33 +0000 (-0700) Subject: Fix (dev): retain backwards compatibility with natural-date keywords in tantivy ... X-Git-Url: http://git.ipfire.org/index.cgi?a=commitdiff_plain;h=HEAD;p=thirdparty%2Fpaperless-ngx.git Fix (dev): retain backwards compatibility with natural-date keywords in tantivy (#12602) --- diff --git a/docs/usage.md b/docs/usage.md index e8b884251..98eceb22a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -855,13 +855,14 @@ Matching natural date keywords: ``` added:today modified:yesterday -created:this_week -added:last_month -modified:this_year +created:"previous week" +added:"previous month" +modified:"this year" ``` -Supported date keywords: `today`, `yesterday`, `this_week`, `last_week`, -`this_month`, `last_month`, `this_year`, `last_year`. +Supported date keywords: `today`, `yesterday`, `previous week`, +`this month`, `previous month`, `this year`, `previous year`, +`previous quarter`. #### Searching custom fields diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py index ed0bb4c15..1bd31b804 100644 --- a/src/documents/search/_query.py +++ b/src/documents/search/_query.py @@ -25,21 +25,39 @@ _REGEX_TIMEOUT: Final[float] = 1.0 _DATE_ONLY_FIELDS = frozenset({"created"}) +_TODAY: Final[str] = "today" +_YESTERDAY: Final[str] = "yesterday" +_PREVIOUS_WEEK: Final[str] = "previous week" +_THIS_MONTH: Final[str] = "this month" +_PREVIOUS_MONTH: Final[str] = "previous month" +_THIS_YEAR: Final[str] = "this year" +_PREVIOUS_YEAR: Final[str] = "previous year" +_PREVIOUS_QUARTER: Final[str] = "previous quarter" + _DATE_KEYWORDS = frozenset( { - "today", - "yesterday", - "this_week", - "last_week", - "this_month", - "last_month", - "this_year", - "last_year", + _TODAY, + _YESTERDAY, + _PREVIOUS_WEEK, + _THIS_MONTH, + _PREVIOUS_MONTH, + _THIS_YEAR, + _PREVIOUS_YEAR, + _PREVIOUS_QUARTER, }, ) +_DATE_KEYWORD_PATTERN = "|".join( + sorted((regex.escape(k) for k in _DATE_KEYWORDS), key=len, reverse=True), +) + _FIELD_DATE_RE = regex.compile( - r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b", + rf"""(?P\w+)\s*:\s*(?: + (?P["'])(?P{_DATE_KEYWORD_PATTERN})(?P=quote) + | + (?P{_DATE_KEYWORD_PATTERN})(?![\w-]) +)""", + regex.IGNORECASE | regex.VERBOSE, ) _COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b") _RELATIVE_RANGE_RE = regex.compile( @@ -74,44 +92,59 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str: today = datetime.now(tz).date() - if keyword == "today": + def _quarter_start(d: date) -> date: + return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1) + + if keyword == _TODAY: lo = datetime(today.year, today.month, today.day, tzinfo=UTC) return _iso_range(lo, lo + timedelta(days=1)) - if keyword == "yesterday": + if keyword == _YESTERDAY: y = today - timedelta(days=1) lo = datetime(y.year, y.month, y.day, tzinfo=UTC) hi = datetime(today.year, today.month, today.day, tzinfo=UTC) return _iso_range(lo, hi) - if keyword == "this_week": - mon = today - timedelta(days=today.weekday()) - lo = datetime(mon.year, mon.month, mon.day, tzinfo=UTC) - return _iso_range(lo, lo + timedelta(weeks=1)) - if keyword == "last_week": + if keyword == _PREVIOUS_WEEK: this_mon = today - timedelta(days=today.weekday()) last_mon = this_mon - timedelta(weeks=1) lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC) hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC) return _iso_range(lo, hi) - if keyword == "this_month": + if keyword == _THIS_MONTH: lo = datetime(today.year, today.month, 1, tzinfo=UTC) if today.month == 12: hi = datetime(today.year + 1, 1, 1, tzinfo=UTC) else: hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC) return _iso_range(lo, hi) - if keyword == "last_month": + if keyword == _PREVIOUS_MONTH: if today.month == 1: lo = datetime(today.year - 1, 12, 1, tzinfo=UTC) else: lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC) hi = datetime(today.year, today.month, 1, tzinfo=UTC) return _iso_range(lo, hi) - if keyword == "this_year": + if keyword == _THIS_YEAR: lo = datetime(today.year, 1, 1, tzinfo=UTC) return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC)) - if keyword == "last_year": + if keyword == _PREVIOUS_YEAR: lo = datetime(today.year - 1, 1, 1, tzinfo=UTC) return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC)) + if keyword == _PREVIOUS_QUARTER: + this_quarter = _quarter_start(today) + last_quarter = this_quarter - relativedelta(months=3) + lo = datetime( + last_quarter.year, + last_quarter.month, + last_quarter.day, + tzinfo=UTC, + ) + hi = datetime( + this_quarter.year, + this_quarter.month, + this_quarter.day, + tzinfo=UTC, + ) + return _iso_range(lo, hi) raise ValueError(f"Unknown keyword: {keyword}") @@ -127,42 +160,46 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str: def _midnight(d: date) -> datetime: return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC) - if keyword == "today": + def _quarter_start(d: date) -> date: + return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1) + + if keyword == _TODAY: return _iso_range(_midnight(today), _midnight(today + timedelta(days=1))) - if keyword == "yesterday": + if keyword == _YESTERDAY: y = today - timedelta(days=1) return _iso_range(_midnight(y), _midnight(today)) - if keyword == "this_week": - mon = today - timedelta(days=today.weekday()) - return _iso_range(_midnight(mon), _midnight(mon + timedelta(weeks=1))) - if keyword == "last_week": + if keyword == _PREVIOUS_WEEK: this_mon = today - timedelta(days=today.weekday()) last_mon = this_mon - timedelta(weeks=1) return _iso_range(_midnight(last_mon), _midnight(this_mon)) - if keyword == "this_month": + if keyword == _THIS_MONTH: first = today.replace(day=1) if today.month == 12: next_first = date(today.year + 1, 1, 1) else: next_first = date(today.year, today.month + 1, 1) return _iso_range(_midnight(first), _midnight(next_first)) - if keyword == "last_month": + if keyword == _PREVIOUS_MONTH: this_first = today.replace(day=1) if today.month == 1: last_first = date(today.year - 1, 12, 1) else: last_first = date(today.year, today.month - 1, 1) return _iso_range(_midnight(last_first), _midnight(this_first)) - if keyword == "this_year": + if keyword == _THIS_YEAR: return _iso_range( _midnight(date(today.year, 1, 1)), _midnight(date(today.year + 1, 1, 1)), ) - if keyword == "last_year": + if keyword == _PREVIOUS_YEAR: return _iso_range( _midnight(date(today.year - 1, 1, 1)), _midnight(date(today.year, 1, 1)), ) + if keyword == _PREVIOUS_QUARTER: + this_quarter = _quarter_start(today) + last_quarter = this_quarter - relativedelta(months=3) + return _iso_range(_midnight(last_quarter), _midnight(this_quarter)) raise ValueError(f"Unknown keyword: {keyword}") @@ -308,7 +345,7 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str: - Compact 14-digit dates (YYYYMMDDHHmmss) - Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h]) - 8-digit dates with field awareness (created:20240115) - - Natural keywords (field:today, field:last_week, etc.) + - Natural keywords (field:today, field:"previous quarter", etc.) Args: query: Raw user query string @@ -326,7 +363,8 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str: query = _rewrite_relative_range(query) def _replace(m: regex.Match[str]) -> str: - field, keyword = m.group(1), m.group(2) + field = m.group("field") + keyword = (m.group("quoted") or m.group("bare")).lower() if field in _DATE_ONLY_FIELDS: return f"{field}:{_date_only_range(keyword, tz)}" return f"{field}:{_datetime_range(keyword, tz)}" diff --git a/src/documents/tests/search/test_query.py b/src/documents/tests/search/test_query.py index 74a064dbb..e47d6b7df 100644 --- a/src/documents/tests/search/test_query.py +++ b/src/documents/tests/search/test_query.py @@ -81,45 +81,38 @@ class TestCreatedDateField: ), pytest.param( "created", - "this_week", - "2026-03-23T00:00:00Z", - "2026-03-30T00:00:00Z", - id="this_week_mon_sun", - ), - pytest.param( - "created", - "last_week", + "previous week", "2026-03-16T00:00:00Z", "2026-03-23T00:00:00Z", - id="last_week", + id="previous_week", ), pytest.param( "created", - "this_month", + "this month", "2026-03-01T00:00:00Z", "2026-04-01T00:00:00Z", id="this_month", ), pytest.param( "created", - "last_month", + "previous month", "2026-02-01T00:00:00Z", "2026-03-01T00:00:00Z", - id="last_month", + id="previous_month", ), pytest.param( "created", - "this_year", + "this year", "2026-01-01T00:00:00Z", "2027-01-01T00:00:00Z", id="this_year", ), pytest.param( "created", - "last_year", + "previous year", "2025-01-01T00:00:00Z", "2026-01-01T00:00:00Z", - id="last_year", + id="previous_year", ), ], ) @@ -141,7 +134,7 @@ class TestCreatedDateField: def test_this_month_december_wraps_to_next_year(self) -> None: # December: next month must roll over to January 1 of next year lo, hi = _range( - rewrite_natural_date_keywords("created:this_month", UTC), + rewrite_natural_date_keywords("created:this month", UTC), "created", ) assert lo == "2026-12-01T00:00:00Z" @@ -151,12 +144,21 @@ class TestCreatedDateField: def test_last_month_january_wraps_to_previous_year(self) -> None: # January: last month must roll back to December 1 of previous year lo, hi = _range( - rewrite_natural_date_keywords("created:last_month", UTC), + rewrite_natural_date_keywords("created:previous month", UTC), "created", ) assert lo == "2025-12-01T00:00:00Z" assert hi == "2026-01-01T00:00:00Z" + @time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False) + def test_previous_quarter(self) -> None: + lo, hi = _range( + rewrite_natural_date_keywords('created:"previous quarter"', UTC), + "created", + ) + assert lo == "2026-04-01T00:00:00Z" + assert hi == "2026-07-01T00:00:00Z" + def test_unknown_keyword_raises(self) -> None: with pytest.raises(ValueError, match="Unknown keyword"): _date_only_range("bogus_keyword", UTC) @@ -202,40 +204,34 @@ class TestDateTimeFields: id="yesterday", ), pytest.param( - "this_week", - "2026-03-23T00:00:00Z", - "2026-03-30T00:00:00Z", - id="this_week", - ), - pytest.param( - "last_week", + "previous week", "2026-03-16T00:00:00Z", "2026-03-23T00:00:00Z", - id="last_week", + id="previous_week", ), pytest.param( - "this_month", + "this month", "2026-03-01T00:00:00Z", "2026-04-01T00:00:00Z", id="this_month", ), pytest.param( - "last_month", + "previous month", "2026-02-01T00:00:00Z", "2026-03-01T00:00:00Z", - id="last_month", + id="previous_month", ), pytest.param( - "this_year", + "this year", "2026-01-01T00:00:00Z", "2027-01-01T00:00:00Z", id="this_year", ), pytest.param( - "last_year", + "previous year", "2025-01-01T00:00:00Z", "2026-01-01T00:00:00Z", - id="last_year", + id="previous_year", ), ], ) @@ -254,17 +250,54 @@ class TestDateTimeFields: @time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False) def test_this_month_december_wraps_to_next_year(self) -> None: # December: next month wraps to January of next year - lo, hi = _range(rewrite_natural_date_keywords("added:this_month", UTC), "added") + lo, hi = _range(rewrite_natural_date_keywords("added:this month", UTC), "added") assert lo == "2026-12-01T00:00:00Z" assert hi == "2027-01-01T00:00:00Z" @time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False) def test_last_month_january_wraps_to_previous_year(self) -> None: # January: last month wraps back to December of previous year - lo, hi = _range(rewrite_natural_date_keywords("added:last_month", UTC), "added") + lo, hi = _range( + rewrite_natural_date_keywords("added:previous month", UTC), + "added", + ) assert lo == "2025-12-01T00:00:00Z" assert hi == "2026-01-01T00:00:00Z" + @pytest.mark.parametrize( + ("query", "expected_lo", "expected_hi"), + [ + pytest.param( + 'added:"previous quarter"', + "2026-04-01T00:00:00Z", + "2026-07-01T00:00:00Z", + id="quoted_previous_quarter", + ), + pytest.param( + "added:previous month", + "2026-06-01T00:00:00Z", + "2026-07-01T00:00:00Z", + id="bare_previous_month", + ), + pytest.param( + "added:this month", + "2026-07-01T00:00:00Z", + "2026-08-01T00:00:00Z", + id="bare_this_month", + ), + ], + ) + @time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False) + def test_legacy_natural_language_aliases( + self, + query: str, + expected_lo: str, + expected_hi: str, + ) -> None: + lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added") + assert lo == expected_lo + assert hi == expected_hi + def test_unknown_keyword_raises(self) -> None: with pytest.raises(ValueError, match="Unknown keyword"): _datetime_range("bogus_keyword", UTC) diff --git a/src/documents/tests/test_api_search.py b/src/documents/tests/test_api_search.py index 85f479010..50fff3dbb 100644 --- a/src/documents/tests/test_api_search.py +++ b/src/documents/tests/test_api_search.py @@ -3,6 +3,7 @@ from datetime import timedelta from unittest import mock import pytest +import time_machine from dateutil.relativedelta import relativedelta from django.contrib.auth.models import Group from django.contrib.auth.models import Permission @@ -26,6 +27,7 @@ from documents.models import Tag from documents.models import Workflow from documents.search import get_backend from documents.search import reset_backend +from documents.tests.factories import DocumentFactory from documents.tests.utils import DirectoriesMixin from paperless_mail.models import MailAccount from paperless_mail.models import MailRule @@ -741,6 +743,49 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase): # Tantivy rejects unparsable field queries with a 400 self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + @override_settings( + TIME_ZONE="UTC", + ) + @time_machine.travel( + datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC), + tick=False, + ) + def test_search_added_previous_quarter(self) -> None: + """ + GIVEN: + - Documents inside and outside the previous quarter + WHEN: + - Query with the legacy natural-language phrase used by the UI + THEN: + - Previous-quarter documents are returned + """ + d1 = DocumentFactory.create( + title="quarterly statement april", + content="bank statement", + added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC), + ) + d2 = DocumentFactory.create( + title="quarterly statement june", + content="bank statement", + added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC), + ) + d3 = DocumentFactory.create( + title="quarterly statement july", + content="bank statement", + added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC), + ) + + backend = get_backend() + backend.add_or_update(d1) + backend.add_or_update(d2) + backend.add_or_update(d3) + + response = self.client.get('/api/documents/?query=added:"previous quarter"') + self.assertEqual(response.status_code, status.HTTP_200_OK) + + results = response.data["results"] + self.assertEqual({r["id"] for r in results}, {1, 2}) + @mock.patch("documents.search._backend.TantivyBackend.autocomplete") def test_search_autocomplete_limits(self, m) -> None: """