]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Replaces two sentinel files with .index_settings.json which can properly store multip...
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Thu, 23 Apr 2026 02:38:26 +0000 (19:38 -0700)
committerGitHub <noreply@github.com>
Thu, 23 Apr 2026 02:38:26 +0000 (02:38 +0000)
src/documents/search/_schema.py
src/documents/tests/search/test_schema.py

index 479c60bc56d55274cea33d1a901341c2cb0a450b..bc0832d37c33fd5821e43c8f0c6f7d02a1c852b5 100644 (file)
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 import logging
 import shutil
 from typing import TYPE_CHECKING
@@ -100,9 +101,9 @@ def needs_rebuild(index_dir: Path) -> bool:
     """
     Check if the search index needs rebuilding.
 
-    Compares the current schema version and search language configuration
-    against sentinel files to determine if the index is compatible with
-    the current paperless-ngx version and settings.
+    Reads .index_settings.json to compare the stored schema version and
+    search language against the current configuration. Returns True if the
+    file is missing, unparsable, or either value mismatches.
 
     Args:
         index_dir: Path to the search index directory
@@ -110,24 +111,19 @@ def needs_rebuild(index_dir: Path) -> bool:
     Returns:
         True if the index needs rebuilding, False if it's up to date
     """
-    version_file = index_dir / ".schema_version"
-    if not version_file.exists():
+    settings_file = index_dir / ".index_settings.json"
+    if not settings_file.exists():
         return True
     try:
-        if int(version_file.read_text().strip()) != SCHEMA_VERSION:
+        data = json.loads(settings_file.read_text())
+        if data.get("schema_version") != SCHEMA_VERSION:
             logger.info("Search index schema version mismatch - rebuilding.")
             return True
+        if "language" not in data or data["language"] != settings.SEARCH_LANGUAGE:
+            logger.info("Search index language changed - rebuilding.")
+            return True
     except ValueError:
         return True
-
-    language_file = index_dir / ".schema_language"
-    if not language_file.exists():
-        logger.info("Search index language sentinel missing - rebuilding.")
-        return True
-    if language_file.read_text().strip() != (settings.SEARCH_LANGUAGE or ""):
-        logger.info("Search index language changed - rebuilding.")
-        return True
-
     return False
 
 
@@ -149,9 +145,16 @@ def wipe_index(index_dir: Path) -> None:
 
 
 def _write_sentinels(index_dir: Path) -> None:
-    """Write schema version and language sentinel files so the next index open can skip rebuilding."""
-    (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
-    (index_dir / ".schema_language").write_text(settings.SEARCH_LANGUAGE or "")
+    """Write .index_settings.json so the next index open can skip rebuilding."""
+    settings_file = index_dir / ".index_settings.json"
+    settings_file.write_text(
+        json.dumps(
+            {
+                "schema_version": SCHEMA_VERSION,
+                "language": settings.SEARCH_LANGUAGE,
+            },
+        ),
+    )
 
 
 def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index:
index 1ff9bee32769dc3f0ec635df20b3aff230a27db8..7219df5806d5f8c7ccda52c5681ee448110b62f1 100644 (file)
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 from typing import TYPE_CHECKING
 
 import pytest
@@ -18,7 +19,7 @@ pytestmark = pytest.mark.search
 class TestNeedsRebuild:
     """needs_rebuild covers all sentinel-file states that require a full reindex."""
 
-    def test_returns_true_when_version_file_missing(self, index_dir: Path) -> None:
+    def test_returns_true_when_settings_file_missing(self, index_dir: Path) -> None:
         assert needs_rebuild(index_dir) is True
 
     def test_returns_false_when_version_and_language_match(
@@ -27,37 +28,51 @@ class TestNeedsRebuild:
         settings: SettingsWrapper,
     ) -> None:
         settings.SEARCH_LANGUAGE = "en"
-        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
-        (index_dir / ".schema_language").write_text("en")
+        (index_dir / ".index_settings.json").write_text(
+            json.dumps({"schema_version": SCHEMA_VERSION, "language": "en"}),
+        )
         assert needs_rebuild(index_dir) is False
 
-    def test_returns_true_on_schema_version_mismatch(self, index_dir: Path) -> None:
-        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION - 1))
+    def test_returns_true_on_schema_version_mismatch(
+        self,
+        index_dir: Path,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.SEARCH_LANGUAGE = None
+        (index_dir / ".index_settings.json").write_text(
+            json.dumps({"schema_version": SCHEMA_VERSION - 1, "language": None}),
+        )
         assert needs_rebuild(index_dir) is True
 
-    def test_returns_true_when_version_file_not_an_integer(
+    def test_returns_true_when_version_is_not_an_integer(
         self,
         index_dir: Path,
+        settings: SettingsWrapper,
     ) -> None:
-        (index_dir / ".schema_version").write_text("not-a-number")
+        settings.SEARCH_LANGUAGE = None
+        (index_dir / ".index_settings.json").write_text(
+            json.dumps({"schema_version": "not-a-number", "language": None}),
+        )
         assert needs_rebuild(index_dir) is True
 
-    def test_returns_true_when_language_sentinel_missing(
+    def test_returns_true_when_language_key_missing(
         self,
         index_dir: Path,
         settings: SettingsWrapper,
     ) -> None:
         settings.SEARCH_LANGUAGE = "en"
-        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
-        # .schema_language intentionally absent
+        (index_dir / ".index_settings.json").write_text(
+            json.dumps({"schema_version": SCHEMA_VERSION}),
+        )
         assert needs_rebuild(index_dir) is True
 
-    def test_returns_true_when_language_sentinel_content_differs(
+    def test_returns_true_when_language_differs(
         self,
         index_dir: Path,
         settings: SettingsWrapper,
     ) -> None:
         settings.SEARCH_LANGUAGE = "de"
-        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
-        (index_dir / ".schema_language").write_text("en")
+        (index_dir / ".index_settings.json").write_text(
+            json.dumps({"schema_version": SCHEMA_VERSION, "language": "en"}),
+        )
         assert needs_rebuild(index_dir) is True