]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Adds new setting to control color conversions (#4709)
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Wed, 29 Nov 2023 20:18:44 +0000 (12:18 -0800)
committerGitHub <noreply@github.com>
Wed, 29 Nov 2023 20:18:44 +0000 (12:18 -0800)
docs/configuration.md
src/paperless/settings.py
src/paperless_tesseract/parsers.py

index e952ec41bba4b855bbe885a287d7c97902eac844..c0e8022ac3148b50047ecaee975ff853c11669ab 100644 (file)
@@ -704,6 +704,20 @@ but could result in missing text content.
         this value if you are certain your documents are not malicious and
         you need the text which was not OCRed
 
+#### [`PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY=<RGB>`](#PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY) {#PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY}
+
+: Controls the Ghostscript color conversion strategy when creating the archive file. This setting
+will only be utilized if the output is a version of PDF/A.
+
+    Valid options are CMYK, Gray, LeaveColorUnchanged, RGB or UseDeviceIndependentColor.
+
+    You can find more on the settings [here](https://ghostscript.readthedocs.io/en/latest/VectorDevices.html#color-conversion-and-management) in the Ghostscript documentation.
+
+    !!! warning
+
+        Utilizing some of the options may result in errors when creating archive
+        files from PDFs.
+
 #### [`PAPERLESS_OCR_USER_ARGS=<json>`](#PAPERLESS_OCR_USER_ARGS) {#PAPERLESS_OCR_USER_ARGS}
 
 : OCRmyPDF offers many more options. Use this parameter to specify any
index 86f1f569fb82c667cc0f9f0c9f38c8ce4e8e23ba..9daeb8a47a3ad613db5e6d687ffabfae4c08558d 100644 (file)
@@ -864,6 +864,11 @@ OCR_MAX_IMAGE_PIXELS: Optional[int] = None
 if os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS") is not None:
     OCR_MAX_IMAGE_PIXELS: int = int(os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS"))
 
+OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
+    "PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY",
+    "RGB",
+)
+
 OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
 
 # GNUPG needs a home directory for some reason
index 3523da7bde4a482abad0f76aaa03f1d664e2ea63..babcf6bcfd360d15e86ac966e0d550ea22057602 100644 (file)
@@ -186,6 +186,11 @@ class RasterisedDocumentParser(DocumentParser):
             "progress_bar": False,
         }
 
+        if "pdfa" in ocrmypdf_args["output_type"]:
+            ocrmypdf_args[
+                "color_conversion_strategy"
+            ] = settings.OCR_COLOR_CONVERSION_STRATEGY
+
         if settings.OCR_MODE == "force" or safe_fallback:
             ocrmypdf_args["force_ocr"] = True
         elif settings.OCR_MODE in ["skip", "skip_noarchive"]: