From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Sun, 4 Dec 2022 16:44:35 +0000 (-0800) Subject: Fixes language code checks around two part languages X-Git-Tag: v1.10.2~2^2~26 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=55ef0d4a1b62c3abe8500cad97ddeecf9f746b84;p=thirdparty%2Fpaperless-ngx.git Fixes language code checks around two part languages --- diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py index c63761f31f..ed5725d367 100644 --- a/src/paperless_tesseract/checks.py +++ b/src/paperless_tesseract/checks.py @@ -16,8 +16,7 @@ def get_tesseract_langs(): # Decode bytes to string, split on newlines, trim out the header proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:] - # Replace _ with - to convert two part languages to the expected code - return [x.replace("_", "-") for x in proc_lines] + return [x.strip() for x in proc_lines] @register() diff --git a/src/paperless_tesseract/tests/test_checks.py b/src/paperless_tesseract/tests/test_checks.py index cfac11d3c9..4d46ad9a39 100644 --- a/src/paperless_tesseract/tests/test_checks.py +++ b/src/paperless_tesseract/tests/test_checks.py @@ -27,3 +27,40 @@ class TestChecks(TestCase): msgs = check_default_language_available(None) self.assertEqual(len(msgs), 1) self.assertEqual(msgs[0].level, ERROR) + + @override_settings(OCR_LANGUAGE="chi_sim") + @mock.patch("paperless_tesseract.checks.get_tesseract_langs") + def test_multi_part_language(self, m): + """ + GIVEN: + - An OCR language which is multi part (ie chi-sim) + - The language is correctly formatted + WHEN: + - Installed packages are checked + THEN: + - No errors are reported + """ + m.return_value = ["chi_sim", "eng"] + + msgs = check_default_language_available(None) + + self.assertEqual(len(msgs), 0) + + @override_settings(OCR_LANGUAGE="chi-sim") + @mock.patch("paperless_tesseract.checks.get_tesseract_langs") + def test_multi_part_language_bad_format(self, m): + """ + GIVEN: + - An OCR language which is multi part (ie chi-sim) + - The language is correctly NOT formatted + WHEN: + - Installed packages are checked + THEN: + - No errors are reported + """ + m.return_value = ["chi_sim", "eng"] + + msgs = check_default_language_available(None) + + self.assertEqual(len(msgs), 1) + self.assertEqual(msgs[0].level, ERROR)