]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Feature: Allow user to control PIL image pixel limit (#5997)
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 5 Mar 2024 00:19:56 +0000 (16:19 -0800)
committerGitHub <noreply@github.com>
Tue, 5 Mar 2024 00:19:56 +0000 (00:19 +0000)
docs/configuration.md
src/documents/barcodes.py
src/documents/converters.py
src/documents/utils.py
src/paperless/settings.py
src/paperless_tesseract/parsers.py
src/paperless_tesseract/tests/test_parser.py

index c7b710c66c2a7b101c7737e6161ce3959cadfa83..b1e882845205a351fbe401365a1619c4f54ec860 100644 (file)
@@ -969,6 +969,20 @@ be used with caution!
 
     Defaults to None, which does not add any additional apps.
 
+#### [`PAPERLESS_MAX_IMAGE_PIXELS=<number>`](#PAPERLESS_MAX_IMAGE_PIXELS) {#PAPERLESS_MAX_IMAGE_PIXELS}
+
+: Configures the maximum size of an image PIL will allow to load without warning or error.
+
+: If unset, will default to the value determined by
+[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
+
+    Defaults to None, which does change the limit
+
+    !!! warning
+
+        This limit is designed to prevent denial of service from malicious files.
+        It should only be raised or disabled in certain circumstances and with great care.
+
 ## Document Consumption {#consume_config}
 
 #### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
index e68ba4f8c8ba23ba4388c9902521f0bf041ec71d..e77b35fb396c21d30cb41121870e0a704ffb264f 100644 (file)
@@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError
 from documents.plugins.helpers import ProgressStatusOptions
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
+from documents.utils import maybe_override_pixel_limit
 
 logger = logging.getLogger("paperless.barcodes")
 
@@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
         self.barcodes: list[Barcode] = []
 
     def run(self) -> Optional[str]:
+        # Some operations may use PIL, override pixel setting if needed
+        maybe_override_pixel_limit()
+
         # Maybe do the conversion of TIFF to PDF
         self.convert_from_tiff_to_pdf()
 
index e3a7cb78676b9b4b8ede1d85e94888ba3967b14e..5c5ba1e078b10d60055eef37a2cf5bf9e67af775 100644 (file)
@@ -6,6 +6,7 @@ from django.conf import settings
 from PIL import Image
 
 from documents.utils import copy_basic_file_stats
+from documents.utils import maybe_override_pixel_limit
 
 
 def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
@@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
 
     Returns the path of the PDF created.
     """
+    # override pixel setting if needed
+    maybe_override_pixel_limit()
+
     with Image.open(tiff_path) as im:
         has_alpha_layer = im.mode in ("RGBA", "LA")
     if has_alpha_layer:
index b84c9b53c0eeef5926e5a3e81af5cd24b5a5aa48..29f4de14df3185a908e591e2b10084ffe2ccab9d 100644 (file)
@@ -1,8 +1,12 @@
 import shutil
 from os import utime
 from pathlib import Path
+from typing import Optional
 from typing import Union
 
+from django.conf import settings
+from PIL import Image
+
 
 def _coerce_to_path(
     source: Union[Path, str],
@@ -40,3 +44,15 @@ def copy_file_with_basic_stats(
 
     shutil.copy(source, dest)
     copy_basic_file_stats(source, dest)
+
+
+def maybe_override_pixel_limit() -> None:
+    """
+    Maybe overrides the PIL limit on pixel count, if configured to allow it
+    """
+    limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
+    if limit is not None and limit >= 0:
+        pixel_count = limit
+        if pixel_count == 0:
+            pixel_count = None
+        Image.MAX_IMAGE_PIXELS = pixel_count
index 1c61132732d392ca1c3633358540636d6122e7fd..77adb6bbff8722753c10faeb725b2bad6c3f3ddb 100644 (file)
@@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
 
 OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
 
+MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
+    "PAPERLESS_MAX_IMAGE_PIXELS",
+)
+
 # GNUPG needs a home directory for some reason
 GNUPG_HOME = os.getenv("HOME", "/tmp")
 
index 020922703e3aedf3fa29d7a63f24d9e2726d0eaa..c483a3da4d637516f63a3cd5240ae096a6a40f41 100644 (file)
@@ -12,6 +12,7 @@ from PIL import Image
 from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
+from documents.utils import maybe_override_pixel_limit
 from paperless.config import OcrConfig
 from paperless.models import ArchiveFileChoices
 from paperless.models import CleanChoices
@@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser):
             ocrmypdf_args["sidecar"] = sidecar_file
 
         if self.is_image(mime_type):
+            # This may be required, depending on the known imformation
+            maybe_override_pixel_limit()
+
             dpi = self.get_dpi(input_file)
             a4_dpi = self.calculate_a4_dpi(input_file)
 
index f64cb69f057f68ac7138cf42f6c9cd07b9e6754e..fae64742ec0c042d8f6f46674da1949dd12a5e5f 100644 (file)
@@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 
         self.assertRaises(ParseError, f)
 
-    @override_settings(OCR_IMAGE_DPI=72)
+    @override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0)
     def test_image_no_dpi_default(self):
         parser = RasterisedDocumentParser(None)