postgresql-client \
# For Numpy
libatlas3-base \
- # thumbnail size reduction
- pngquant \
# OCRmyPDF dependencies
tesseract-ocr \
tesseract-ocr-eng \
.. code::
decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
-
-Managing thumbnail format
-===================
-
-Document thumbnails were originally created as PNG format. Newly
-uploaded documents are now using WebP to reduce both storage space and
-page loading times. To convert older PNG format thumbnails to WebP
-run:
-
-.. code::
-
- convert_thumbnails
# this is here so that django finds the checks.
from .checks import changed_password_check
from .checks import parser_check
-from .checks import png_thumbnail_check
-__all__ = ["changed_password_check", "parser_check", "png_thumbnail_check"]
+__all__ = ["changed_password_check", "parser_check"]
import textwrap
-from pathlib import Path
from django.conf import settings
from django.core.checks import Error
-from django.core.checks import Info
from django.core.checks import register
from django.core.exceptions import FieldError
from django.db.utils import OperationalError
]
else:
return []
-
-
-@register()
-def png_thumbnail_check(app_configs, **kwargs):
- from documents.models import Document
-
- try:
- documents = Document.objects.all()
- for document in documents:
- existing_thumbnail = Path(document.thumbnail_path).resolve()
- if existing_thumbnail.suffix == ".png":
- return [
- Info(
- "PNG thumbnails found, consider running convert_thumbnails "
- "to convert to WebP",
- ),
- ]
- return []
- except (OperationalError, ProgrammingError, FieldError):
- return [] # No documents table yet
original_target = os.path.join(self.target, original_name)
document_dict[EXPORTER_FILE_NAME] = original_name
- thumbnail_name = base_name + "-thumbnail.png"
+ thumbnail_name = base_name + "-thumbnail.webp"
thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
import logging
import multiprocessing
import shutil
-from pathlib import Path
import tqdm
from django import db
try:
- existing_thumbnail = Path(document.thumbnail_path).resolve()
-
- # Remove an existing PNG format thumbnail, if it existed
- if existing_thumbnail.exists() and existing_thumbnail.suffix == ".png":
- existing_thumbnail.unlink()
-
thumb = parser.get_thumbnail(
document.source_path,
document.mime_type,
)
with open(
- os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.png"),
+ os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"),
"wb",
) as f:
f.write(content_thumbnail)
"samples",
"documents",
"thumbnails",
- "0000001.png",
+ "0000001.webp",
)
archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
def __init__(self, logging_group, scratch_dir, archive_path):
super().__init__(logging_group, None)
- _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+ _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
self.archive_path = archive_path
def get_thumbnail(self, document_path, mime_type, file_name=None):
def __init__(self, logging_group, progress_callback=None):
super().__init__(logging_group, progress_callback)
- _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
+ _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=self.tempdir)
def parse(self, document_path, mime_type, file_name=None):
self.text = "The text"
def __init__(self, logging_group, scratch_dir):
super().__init__(logging_group)
- _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+ _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
return "application/pdf"
elif os.path.splitext(file)[1] == ".png":
return "image/png"
+ elif os.path.splitext(file)[1] == ".webp":
+ return "image/webp"
else:
return "unknown"
else:
"samples",
"documents",
"thumbnails",
- f"0000004.png.gpg",
+ f"0000004.webp.gpg",
),
- os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"),
+ os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"),
)
call_command("decrypt_documents")
"samples",
"documents",
"thumbnails",
- "0000001.png",
+ "0000001.webp",
),
- os.path.join(self.dirs.thumbnail_dir, "0000001.png"),
+ os.path.join(self.dirs.thumbnail_dir, "0000001.webp"),
)
return Document.objects.create(
handle = doc.thumbnail_file
# TODO: Send ETag information and use that to send new thumbnails
# if available
- thumbnail_path = doc.thumbnail_path
- if os.path.splitext(thumbnail_path)[1] == ".webp":
- content_type = "image/webp"
- else:
- content_type = "image/png"
+ content_type = "image/webp"
return HttpResponse(handle, content_type=content_type)
except (FileNotFoundError, Document.DoesNotExist):