]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
implement PAPERLESS_OCR_MAX_IMAGE_PIXELS
authorHenning Häcker <henning.haecker+github.com@gmail.com>
Sat, 19 Mar 2022 00:03:45 +0000 (01:03 +0100)
committerJohann Bauer <bauerj@bauerj.eu>
Wed, 30 Mar 2022 07:23:45 +0000 (09:23 +0200)
docs/configuration.rst
src/paperless_tesseract/parsers.py
src/paperless_text/parsers.py

index 82a14ae52b79a33bb41c5168fde2547925db6999..ab35b49d452f3004d9625cf2d24a1ca56e5a27d5 100644 (file)
@@ -389,6 +389,15 @@ PAPERLESS_OCR_IMAGE_DPI=<num>
     Default is none, which will automatically calculate image DPI so that
     the produced PDF documents are A4 sized.
 
+PAPERLESS_OCR_MAX_IMAGE_PIXELS=<num>
+    Paperless will not OCR images that have more pixels than this limit.
+    This is intended to prevent decompression bombs from overloading paperless.
+    Increasing this limit is desired if you face a DecompressionBombError despite
+    the concerning file not being malicious; this could e.g. be caused by invalidly
+    recognized metadata.
+    If you have enough resources or if you are certain that your uploaded files
+    are not malicious you can increase this value to your needs.
+    The default value is 256000000.
 
 PAPERLESS_OCR_USER_ARGS=<json>
     OCRmyPDF offers many more options. Use this parameter to specify any
index ad167ecf06d3db67343bdd5cb249843a97fadbf9..4065890eccae1968d390ba6a5eee6c2fb9a26fb3 100644 (file)
@@ -8,6 +8,7 @@ from documents.parsers import make_thumbnail_from_pdf
 from documents.parsers import ParseError
 from PIL import Image
 
+Image.MAX_IMAGE_PIXELS = os.environ.get('PAPERLESS_OCR_MAX_IMAGE_PIXELS', Image.MAX_IMAGE_PIXELS)
 
 class NoTextFoundException(Exception):
     pass
index a0f19c020d03243956a0bec204eaeec3bef09dec..e41e25e762a765380b593a4f9ee9fe4cc3ff5c88 100644 (file)
@@ -6,6 +6,7 @@ from PIL import Image
 from PIL import ImageDraw
 from PIL import ImageFont
 
+Image.MAX_IMAGE_PIXELS = os.environ.get('PAPERLESS_OCR_MAX_IMAGE_PIXELS', Image.MAX_IMAGE_PIXELS)
 
 class TextDocumentParser(DocumentParser):
     """