From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Sun, 8 Sep 2024 19:17:32 +0000 (-0700) Subject: Fix: Rework system check so it won't crash if tesseract is not found (#7640) X-Git-Tag: v2.12.0~1^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3df8be0bc70ccf05bd8bfe57ed8d5c8854b80c2e;p=thirdparty%2Fpaperless-ngx.git Fix: Rework system check so it won't crash if tesseract is not found (#7640) --- diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 4ba322666b..150fcb2011 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -78,7 +78,7 @@ def binaries_check(app_configs, **kwargs): error = "Paperless can't find {}. Without it, consumption is impossible." hint = "Either it's not in your ${PATH} or it's not installed." - binaries = (settings.CONVERT_BINARY, "tesseract") + binaries = (settings.CONVERT_BINARY, "tesseract", "gs") check_messages = [] for binary in binaries: diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py index 82d2550051..0d7a1d90d2 100644 --- a/src/paperless_tesseract/checks.py +++ b/src/paperless_tesseract/checks.py @@ -21,26 +21,32 @@ def get_tesseract_langs(): @register() def check_default_language_available(app_configs, **kwargs): - installed_langs = get_tesseract_langs() + errs = [] if not settings.OCR_LANGUAGE: - return [ + errs.append( Warning( "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. " "This means that tesseract will fallback to english.", ), - ] - - specified_langs = settings.OCR_LANGUAGE.split("+") - - for lang in specified_langs: - if lang not in installed_langs: - return [ - Error( - f"The selected ocr language {lang} is " - f"not installed. Paperless cannot OCR your documents " - f"without it. Please fix PAPERLESS_OCR_LANGUAGE.", - ), - ] - - return [] + ) + return errs + + # binaries_check in paperless will check and report if this doesn't exist + # So skip trying to do anything here and let that handle missing binaries + if shutil.which("tesseract") is not None: + installed_langs = get_tesseract_langs() + + specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")] + + for lang in specified_langs: + if lang not in installed_langs: + errs.append( + Error( + f"The selected ocr language {lang} is " + f"not installed. Paperless cannot OCR your documents " + f"without it. Please fix PAPERLESS_OCR_LANGUAGE.", + ), + ) + + return errs