]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fixes how a language code like chi-sim is treated in the checks 2057/head
authorTrenton Holmes <797416+stumpylog@users.noreply.github.com>
Sun, 27 Nov 2022 16:28:22 +0000 (08:28 -0800)
committerTrenton Holmes <797416+stumpylog@users.noreply.github.com>
Sun, 27 Nov 2022 16:28:22 +0000 (08:28 -0800)
src/paperless_tesseract/checks.py

index 99780cad4660fe83f193cddb5f3c660f3899de83..c63761f31fd47b4dff8821574d2b5bc56e167f5a 100644 (file)
@@ -1,3 +1,4 @@
+import shutil
 import subprocess
 
 from django.conf import settings
@@ -7,10 +8,16 @@ from django.core.checks import Warning
 
 
 def get_tesseract_langs():
-    with subprocess.Popen(["tesseract", "--list-langs"], stdout=subprocess.PIPE) as p:
-        stdout, stderr = p.communicate()
+    proc = subprocess.run(
+        [shutil.which("tesseract"), "--list-langs"],
+        capture_output=True,
+    )
 
-    return stdout.decode().strip().split("\n")[1:]
+    # Decode bytes to string, split on newlines, trim out the header
+    proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:]
+
+    # Replace _ with - to convert two part languages to the expected code
+    return [x.replace("_", "-") for x in proc_lines]
 
 
 @register()