From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Sun, 27 Nov 2022 16:28:22 +0000 (-0800) Subject: Fixes how a language code like chi-sim is treated in the checks X-Git-Tag: v1.10.1~1^2~15^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F2057%2Fhead;p=thirdparty%2Fpaperless-ngx.git Fixes how a language code like chi-sim is treated in the checks --- diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py index 99780cad46..c63761f31f 100644 --- a/src/paperless_tesseract/checks.py +++ b/src/paperless_tesseract/checks.py @@ -1,3 +1,4 @@ +import shutil import subprocess from django.conf import settings @@ -7,10 +8,16 @@ from django.core.checks import Warning def get_tesseract_langs(): - with subprocess.Popen(["tesseract", "--list-langs"], stdout=subprocess.PIPE) as p: - stdout, stderr = p.communicate() + proc = subprocess.run( + [shutil.which("tesseract"), "--list-langs"], + capture_output=True, + ) - return stdout.decode().strip().split("\n")[1:] + # Decode bytes to string, split on newlines, trim out the header + proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:] + + # Replace _ with - to convert two part languages to the expected code + return [x.replace("_", "-") for x in proc_lines] @register()