]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fix: Rework system check so it won't crash if tesseract is not found (#7640)
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Sun, 8 Sep 2024 19:17:32 +0000 (12:17 -0700)
committerGitHub <noreply@github.com>
Sun, 8 Sep 2024 19:17:32 +0000 (12:17 -0700)
src/paperless/checks.py
src/paperless_tesseract/checks.py

index 4ba322666b43324c63060ea7a9b6fd5f787ebab0..150fcb201125d8d3b9a23e925e7728bd29725fa9 100644 (file)
@@ -78,7 +78,7 @@ def binaries_check(app_configs, **kwargs):
     error = "Paperless can't find {}. Without it, consumption is impossible."
     hint = "Either it's not in your ${PATH} or it's not installed."
 
-    binaries = (settings.CONVERT_BINARY, "tesseract")
+    binaries = (settings.CONVERT_BINARY, "tesseract", "gs")
 
     check_messages = []
     for binary in binaries:
index 82d2550051f70a6cd7b65b4598c3295cf22fd133..0d7a1d90d26ed5759b7d2b1801e055d87b43e665 100644 (file)
@@ -21,26 +21,32 @@ def get_tesseract_langs():
 
 @register()
 def check_default_language_available(app_configs, **kwargs):
-    installed_langs = get_tesseract_langs()
+    errs = []
 
     if not settings.OCR_LANGUAGE:
-        return [
+        errs.append(
             Warning(
                 "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
                 "This means that tesseract will fallback to english.",
             ),
-        ]
-
-    specified_langs = settings.OCR_LANGUAGE.split("+")
-
-    for lang in specified_langs:
-        if lang not in installed_langs:
-            return [
-                Error(
-                    f"The selected ocr language {lang} is "
-                    f"not installed. Paperless cannot OCR your documents "
-                    f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
-                ),
-            ]
-
-    return []
+        )
+        return errs
+
+    # binaries_check in paperless will check and report if this doesn't exist
+    # So skip trying to do anything here and let that handle missing binaries
+    if shutil.which("tesseract") is not None:
+        installed_langs = get_tesseract_langs()
+
+        specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]
+
+        for lang in specified_langs:
+            if lang not in installed_langs:
+                errs.append(
+                    Error(
+                        f"The selected ocr language {lang} is "
+                        f"not installed. Paperless cannot OCR your documents "
+                        f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
+                    ),
+                )
+
+    return errs