Feature: Allow user to control PIL image pixel limit (#5997)

author Trenton H <797416+stumpylog@users.noreply.github.com>

Tue, 5 Mar 2024 00:19:56 +0000 (16:19 -0800)

committer GitHub <noreply@github.com>

Tue, 5 Mar 2024 00:19:56 +0000 (00:19 +0000)
author Trenton H <797416+stumpylog@users.noreply.github.com>
Tue, 5 Mar 2024 00:19:56 +0000 (16:19 -0800)
committer GitHub <noreply@github.com>
Tue, 5 Mar 2024 00:19:56 +0000 (00:19 +0000)
diff --git a/docs/configuration.md b/docs/configuration.md

index c7b710c66c2a7b101c7737e6161ce3959cadfa83..b1e882845205a351fbe401365a1619c4f54ec860 100644 (file)
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -969,6 +969,20 @@ be used with caution!
  
      Defaults to None, which does not add any additional apps.
  
+#### [`PAPERLESS_MAX_IMAGE_PIXELS=<number>`](#PAPERLESS_MAX_IMAGE_PIXELS) {#PAPERLESS_MAX_IMAGE_PIXELS}
+
+: Configures the maximum size of an image PIL will allow to load without warning or error.
+
+: If unset, will default to the value determined by
+[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
+
+    Defaults to None, which does change the limit
+
+    !!! warning
+
+        This limit is designed to prevent denial of service from malicious files.
+        It should only be raised or disabled in certain circumstances and with great care.
+
  ## Document Consumption {#consume_config}
  
  #### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py

index e68ba4f8c8ba23ba4388c9902521f0bf041ec71d..e77b35fb396c21d30cb41121870e0a704ffb264f 100644 (file)
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError
  from documents.plugins.helpers import ProgressStatusOptions
  from documents.utils import copy_basic_file_stats
  from documents.utils import copy_file_with_basic_stats
+from documents.utils import maybe_override_pixel_limit
  
  logger = logging.getLogger("paperless.barcodes")
  
@@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
          self.barcodes: list[Barcode] = []
  
      def run(self) -> Optional[str]:
+        # Some operations may use PIL, override pixel setting if needed
+        maybe_override_pixel_limit()
+
          # Maybe do the conversion of TIFF to PDF
          self.convert_from_tiff_to_pdf()
  
diff --git a/src/documents/converters.py b/src/documents/converters.py

index e3a7cb78676b9b4b8ede1d85e94888ba3967b14e..5c5ba1e078b10d60055eef37a2cf5bf9e67af775 100644 (file)
--- a/src/documents/converters.py
+++ b/src/documents/converters.py
@@ -6,6 +6,7 @@ from django.conf import settings
  from PIL import Image
  
  from documents.utils import copy_basic_file_stats
+from documents.utils import maybe_override_pixel_limit
  
  
  def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
@@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
  
      Returns the path of the PDF created.
      """
+    # override pixel setting if needed
+    maybe_override_pixel_limit()
+
      with Image.open(tiff_path) as im:
          has_alpha_layer = im.mode in ("RGBA", "LA")
      if has_alpha_layer:
diff --git a/src/documents/utils.py b/src/documents/utils.py

index b84c9b53c0eeef5926e5a3e81af5cd24b5a5aa48..29f4de14df3185a908e591e2b10084ffe2ccab9d 100644 (file)
--- a/src/documents/utils.py
+++ b/src/documents/utils.py
@@ -1,8 +1,12 @@
  import shutil
  from os import utime
  from pathlib import Path
+from typing import Optional
  from typing import Union
  
+from django.conf import settings
+from PIL import Image
+
  
  def _coerce_to_path(
      source: Union[Path, str],
@@ -40,3 +44,15 @@ def copy_file_with_basic_stats(
  
      shutil.copy(source, dest)
      copy_basic_file_stats(source, dest)
+
+
+def maybe_override_pixel_limit() -> None:
+    """
+    Maybe overrides the PIL limit on pixel count, if configured to allow it
+    """
+    limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
+    if limit is not None and limit >= 0:
+        pixel_count = limit
+        if pixel_count == 0:
+            pixel_count = None
+        Image.MAX_IMAGE_PIXELS = pixel_count
diff --git a/src/paperless/settings.py b/src/paperless/settings.py

index 1c61132732d392ca1c3633358540636d6122e7fd..77adb6bbff8722753c10faeb725b2bad6c3f3ddb 100644 (file)
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
  
  OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
  
+MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
+    "PAPERLESS_MAX_IMAGE_PIXELS",
+)
+
  # GNUPG needs a home directory for some reason
  GNUPG_HOME = os.getenv("HOME", "/tmp")
  
diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py

index 020922703e3aedf3fa29d7a63f24d9e2726d0eaa..c483a3da4d637516f63a3cd5240ae096a6a40f41 100644 (file)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -12,6 +12,7 @@ from PIL import Image
  from documents.parsers import DocumentParser
  from documents.parsers import ParseError
  from documents.parsers import make_thumbnail_from_pdf
+from documents.utils import maybe_override_pixel_limit
  from paperless.config import OcrConfig
  from paperless.models import ArchiveFileChoices
  from paperless.models import CleanChoices
@@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser):
              ocrmypdf_args["sidecar"] = sidecar_file
  
          if self.is_image(mime_type):
+            # This may be required, depending on the known imformation
+            maybe_override_pixel_limit()
+
              dpi = self.get_dpi(input_file)
              a4_dpi = self.calculate_a4_dpi(input_file)
  
diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py

index f64cb69f057f68ac7138cf42f6c9cd07b9e6754e..fae64742ec0c042d8f6f46674da1949dd12a5e5f 100644 (file)
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
  
          self.assertRaises(ParseError, f)
  
-    @override_settings(OCR_IMAGE_DPI=72)
+    @override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0)
      def test_image_no_dpi_default(self):
          parser = RasterisedDocumentParser(None)
author	Trenton H <797416+stumpylog@users.noreply.github.com>
	Tue, 5 Mar 2024 00:19:56 +0000 (16:19 -0800)
committer	GitHub <noreply@github.com>
	Tue, 5 Mar 2024 00:19:56 +0000 (00:19 +0000)
docs/configuration.md		patch \| blob \| blame \| history
src/documents/barcodes.py		patch \| blob \| blame \| history
src/documents/converters.py		patch \| blob \| blame \| history
src/documents/utils.py		patch \| blob \| blame \| history
src/paperless/settings.py		patch \| blob \| blame \| history
src/paperless_tesseract/parsers.py		patch \| blob \| blame \| history
src/paperless_tesseract/tests/test_parser.py		patch \| blob \| blame \| history