]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fixes language code checks around two part languages
authorTrenton Holmes <797416+stumpylog@users.noreply.github.com>
Sun, 4 Dec 2022 16:44:35 +0000 (08:44 -0800)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Sun, 4 Dec 2022 20:23:12 +0000 (12:23 -0800)
src/paperless_tesseract/checks.py
src/paperless_tesseract/tests/test_checks.py

index c63761f31fd47b4dff8821574d2b5bc56e167f5a..ed5725d367cfbba642bc2a72f4360a719ac5c81f 100644 (file)
@@ -16,8 +16,7 @@ def get_tesseract_langs():
     # Decode bytes to string, split on newlines, trim out the header
     proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:]
 
-    # Replace _ with - to convert two part languages to the expected code
-    return [x.replace("_", "-") for x in proc_lines]
+    return [x.strip() for x in proc_lines]
 
 
 @register()
index cfac11d3c93888f10b2b81b7bdd2c96eef13fa94..4d46ad9a398260ebb687823baa3cc9cf344a977f 100644 (file)
@@ -27,3 +27,40 @@ class TestChecks(TestCase):
         msgs = check_default_language_available(None)
         self.assertEqual(len(msgs), 1)
         self.assertEqual(msgs[0].level, ERROR)
+
+    @override_settings(OCR_LANGUAGE="chi_sim")
+    @mock.patch("paperless_tesseract.checks.get_tesseract_langs")
+    def test_multi_part_language(self, m):
+        """
+        GIVEN:
+            - An OCR language which is multi part (ie chi-sim)
+            - The language is correctly formatted
+        WHEN:
+            - Installed packages are checked
+        THEN:
+            - No errors are reported
+        """
+        m.return_value = ["chi_sim", "eng"]
+
+        msgs = check_default_language_available(None)
+
+        self.assertEqual(len(msgs), 0)
+
+    @override_settings(OCR_LANGUAGE="chi-sim")
+    @mock.patch("paperless_tesseract.checks.get_tesseract_langs")
+    def test_multi_part_language_bad_format(self, m):
+        """
+        GIVEN:
+            - An OCR language which is multi part (ie chi-sim)
+            - The language is correctly NOT formatted
+        WHEN:
+            - Installed packages are checked
+        THEN:
+            - No errors are reported
+        """
+        m.return_value = ["chi_sim", "eng"]
+
+        msgs = check_default_language_available(None)
+
+        self.assertEqual(len(msgs), 1)
+        self.assertEqual(msgs[0].level, ERROR)