# Decode bytes to string, split on newlines, trim out the header
proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:]
- # Replace _ with - to convert two part languages to the expected code
- return [x.replace("_", "-") for x in proc_lines]
+ return [x.strip() for x in proc_lines]
@register()
msgs = check_default_language_available(None)
self.assertEqual(len(msgs), 1)
self.assertEqual(msgs[0].level, ERROR)
+
+ @override_settings(OCR_LANGUAGE="chi_sim")
+ @mock.patch("paperless_tesseract.checks.get_tesseract_langs")
+ def test_multi_part_language(self, m):
+ """
+ GIVEN:
+ - An OCR language which is multi part (ie chi-sim)
+ - The language is correctly formatted
+ WHEN:
+ - Installed packages are checked
+ THEN:
+ - No errors are reported
+ """
+ m.return_value = ["chi_sim", "eng"]
+
+ msgs = check_default_language_available(None)
+
+ self.assertEqual(len(msgs), 0)
+
+ @override_settings(OCR_LANGUAGE="chi-sim")
+ @mock.patch("paperless_tesseract.checks.get_tesseract_langs")
+ def test_multi_part_language_bad_format(self, m):
+ """
+ GIVEN:
+ - An OCR language which is multi part (ie chi-sim)
+ - The language is correctly NOT formatted
+ WHEN:
+ - Installed packages are checked
+ THEN:
+ - No errors are reported
+ """
+ m.return_value = ["chi_sim", "eng"]
+
+ msgs = check_default_language_available(None)
+
+ self.assertEqual(len(msgs), 1)
+ self.assertEqual(msgs[0].level, ERROR)