]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fixes a corrupted index not being handled by whoosh.create_in (#5950)
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Thu, 29 Feb 2024 00:07:17 +0000 (16:07 -0800)
committerGitHub <noreply@github.com>
Thu, 29 Feb 2024 00:07:17 +0000 (00:07 +0000)
src/documents/index.py
src/documents/tests/test_matchables.py

index de651c13d885e7f04b345f2092a0c43f259db69e..b787c76355157864e7d6bece31d8bc734ec90113 100644 (file)
@@ -5,6 +5,7 @@ from collections import Counter
 from contextlib import contextmanager
 from datetime import datetime
 from datetime import timezone
+from shutil import rmtree
 from typing import Optional
 
 from dateutil.parser import isoparse
@@ -36,7 +37,6 @@ from whoosh.searching import Searcher
 from whoosh.util.times import timespan
 from whoosh.writing import AsyncWriter
 
-# from documents.models import CustomMetadata
 from documents.models import CustomFieldInstance
 from documents.models import Document
 from documents.models import Note
@@ -87,8 +87,11 @@ def open_index(recreate=False) -> FileIndex:
     except Exception:
         logger.exception("Error while opening the index, recreating.")
 
-    if not os.path.isdir(settings.INDEX_DIR):
-        settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
+    # create_in doesn't handle corrupted indexes very well, remove the directory entirely first
+    if os.path.isdir(settings.INDEX_DIR):
+        rmtree(settings.INDEX_DIR)
+    settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
+
     return create_in(settings.INDEX_DIR, get_schema())
 
 
index 34bdffe9500ae599783a75481d31c685317852c8..4626e2c080a56e960bf030191ac6ea3562a564d2 100644 (file)
@@ -1,6 +1,7 @@
 import shutil
 import tempfile
 from collections.abc import Iterable
+from pathlib import Path
 from random import randint
 
 from django.contrib.admin.models import LogEntry
@@ -396,7 +397,7 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
             mime_type="application/pdf",
         )
 
-        self.index_dir = tempfile.mkdtemp()
+        self.index_dir = Path(tempfile.mkdtemp())
         # TODO: we should not need the index here.
         override_settings(INDEX_DIR=self.index_dir).enable()