set -eu
-for command in convert_thumbnails \
- decrypt_documents \
+for command in decrypt_documents \
document_archiver \
document_exporter \
document_importer \
+++ /dev/null
-import logging
-import multiprocessing.pool
-import shutil
-import tempfile
-import time
-from pathlib import Path
-
-from django.core.management.base import BaseCommand
-from documents.models import Document
-from documents.parsers import run_convert
-
-logger = logging.getLogger("paperless.management.convert_thumbnails")
-
-
-def _do_convert(work_package):
- _, existing_thumbnail, converted_thumbnail = work_package
- try:
-
- logger.info(f"Converting thumbnail: {existing_thumbnail}")
-
- # Run actual conversion
- run_convert(
- density=300,
- scale="500x5000>",
- alpha="remove",
- strip=True,
- trim=False,
- auto_orient=True,
- input_file=f"{existing_thumbnail}[0]",
- output_file=str(converted_thumbnail),
- )
-
- # Copy newly created thumbnail to thumbnail directory
- shutil.copy(converted_thumbnail, existing_thumbnail.parent)
-
- # Remove the PNG version
- existing_thumbnail.unlink()
-
- logger.info(
- "Conversion to WebP completed, "
- f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
- )
-
- except Exception as e:
- logger.error(
- f"Error converting thumbnail" f" (existing file unchanged): {e}",
- )
-
-
-class Command(BaseCommand):
-
- help = """
- Converts existing PNG thumbnails into
- WebP format.
- """.replace(
- " ",
- "",
- )
-
- def handle(self, *args, **options):
-
- logger.info("Converting all PNG thumbnails to WebP")
- start = time.time()
- documents = Document.objects.all()
-
- with tempfile.TemporaryDirectory() as tempdir:
-
- work_packages = []
-
- for document in documents:
- existing_thumbnail = Path(document.thumbnail_path).resolve()
-
- if existing_thumbnail.suffix == ".png":
-
- # Change the existing filename suffix from png to webp
- converted_thumbnail_name = existing_thumbnail.with_suffix(
- ".webp",
- ).name
-
- # Create the expected output filename in the tempdir
- converted_thumbnail = (
- Path(tempdir) / Path(converted_thumbnail_name)
- ).resolve()
-
- # Package up the necessary info
- work_packages.append(
- (document, existing_thumbnail, converted_thumbnail),
- )
-
- if len(work_packages):
- with multiprocessing.pool.Pool(processes=4, maxtasksperchild=4) as pool:
- pool.map(_do_convert, work_packages)
-
- end = time.time()
- duration = end - start
-
- logger.info(f"Conversion completed in {duration:.3f}s")
--- /dev/null
+# Generated by Django 4.0.5 on 2022-06-11 15:40
+import logging
+import multiprocessing.pool
+import shutil
+import tempfile
+import time
+from pathlib import Path
+
+from django.conf import settings
+from django.db import migrations
+from documents.parsers import run_convert
+
+logger = logging.getLogger("paperless.migrations")
+
+
+def _do_convert(work_package):
+ existing_thumbnail, converted_thumbnail = work_package
+ try:
+
+ logger.info(f"Converting thumbnail: {existing_thumbnail}")
+
+ # Run actual conversion
+ run_convert(
+ density=300,
+ scale="500x5000>",
+ alpha="remove",
+ strip=True,
+ trim=False,
+ auto_orient=True,
+ input_file=f"{existing_thumbnail}[0]",
+ output_file=str(converted_thumbnail),
+ )
+
+ # Copy newly created thumbnail to thumbnail directory
+ shutil.copy(converted_thumbnail, existing_thumbnail.parent)
+
+ # Remove the PNG version
+ existing_thumbnail.unlink()
+
+ logger.info(
+ "Conversion to WebP completed, "
+ f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
+ )
+
+ except Exception as e:
+ logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
+
+
+def _convert_thumbnails_to_webp(apps, schema_editor):
+ start = time.time()
+
+ with tempfile.TemporaryDirectory() as tempdir:
+
+ work_packages = []
+
+ for file in Path(settings.THUMBNAIL_DIR).glob("*.png"):
+ existing_thumbnail = file.resolve()
+
+ # Change the existing filename suffix from png to webp
+ converted_thumbnail_name = existing_thumbnail.with_suffix(
+ ".webp",
+ ).name
+
+ # Create the expected output filename in the tempdir
+ converted_thumbnail = (
+ Path(tempdir) / Path(converted_thumbnail_name)
+ ).resolve()
+
+ # Package up the necessary info
+ work_packages.append(
+ (existing_thumbnail, converted_thumbnail),
+ )
+
+ if len(work_packages):
+
+ logger.info(
+ "\n\n"
+ " This is a one-time only migration to convert thumbnails for all of your\n"
+ " documents into WebP format. If you have a lot of documents though, \n"
+ " this may take a while, so a coffee break may be in order."
+ "\n",
+ )
+
+ with multiprocessing.pool.Pool(
+ processes=min(multiprocessing.cpu_count(), 4),
+ maxtasksperchild=4,
+ ) as pool:
+ pool.map(_do_convert, work_packages)
+
+ end = time.time()
+ duration = end - start
+
+ logger.info(f"Conversion completed in {duration:.3f}s")
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("documents", "1020_merge_20220518_1839"),
+ ]
+
+ operations = [
+ migrations.RunPython(
+ code=_convert_thumbnails_to_webp,
+ reverse_code=migrations.RunPython.noop,
+ ),
+ ]
@property
def thumbnail_path(self) -> str:
- png_file_name = f"{self.pk:07}.png"
webp_file_name = f"{self.pk:07}.webp"
if self.storage_type == self.STORAGE_TYPE_GPG:
- png_file_name += ".gpg"
webp_file_name += ".gpg"
- # This property is used to both generate the file path
- # and locate the file itself
- # Hence why this looks a little weird
-
webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name)
- png_file_path = os.path.join(settings.THUMBNAIL_DIR, png_file_name)
-
- # 1. Assume the thumbnail is WebP
- if os.path.exists(png_file_path):
- thumb = png_file_path
- else:
- thumb = webp_file_path
- return os.path.normpath(thumb)
+ return os.path.normpath(webp_file_path)
@property
def thumbnail_file(self):
+++ /dev/null
-import filecmp
-import shutil
-import tempfile
-from io import StringIO
-from pathlib import Path
-from unittest import mock
-
-from django.core.management import call_command
-from django.test import override_settings
-from django.test import TestCase
-from documents.models import Document
-
-
-class TestConvertThumbnails(TestCase):
- def call_command(self):
- stdout = StringIO()
- stderr = StringIO()
- call_command(
- "convert_thumbnails",
- "--no-color",
- stdout=stdout,
- stderr=stderr,
- )
- return stdout.getvalue(), stderr.getvalue()
-
- def setUp(self):
- """
- Creates a document in the database
- """
- super().setUp()
-
- self.doc = Document.objects.create(
- pk=1,
- checksum="A",
- title="A",
- content="first document",
- mime_type="application/pdf",
- )
- self.doc.save()
-
- def pretend_convert_output(self, *args, **kwargs):
- """
- Pretends to do the conversion, by copying the input file
- to the output file
- """
- shutil.copy2(
- Path(kwargs["input_file"].rstrip("[0]")),
- Path(kwargs["output_file"]),
- )
-
- def create_webp_thumbnail_file(self, thumb_dir):
- """
- Creates a dummy WebP thumbnail file in the given directory, based on
- the database Document
- """
- thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.webp")
- thumb_file.write_text("this is a dummy webp file")
- return thumb_file
-
- def create_png_thumbnail_file(self, thumb_dir):
- """
- Creates a dummy PNG thumbnail file in the given directory, based on
- the database Document
- """
- thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.png")
- thumb_file.write_text("this is a dummy png file")
- return thumb_file
-
- @mock.patch("documents.management.commands.convert_thumbnails.run_convert")
- def test_do_nothing_if_converted(self, run_convert_mock):
- """
- GIVEN:
- - Document exists with default WebP thumbnail path
- WHEN:
- - Thumbnail conversion is attempted
- THEN:
- - Nothing is converted
- """
-
- stdout, _ = self.call_command()
- run_convert_mock.assert_not_called()
- self.assertIn("Converting all PNG thumbnails to WebP", stdout)
-
- @mock.patch("documents.management.commands.convert_thumbnails.run_convert")
- def test_convert_single_thumbnail(self, run_convert_mock):
- """
- GIVEN:
- - Document exists with PNG thumbnail
- WHEN:
- - Thumbnail conversion is attempted
- THEN:
- - Single thumbnail is converted
- """
-
- run_convert_mock.side_effect = self.pretend_convert_output
-
- with tempfile.TemporaryDirectory() as thumbnail_dir:
-
- with override_settings(
- THUMBNAIL_DIR=thumbnail_dir,
- ):
-
- thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
-
- stdout, _ = self.call_command()
-
- run_convert_mock.assert_called_once()
- self.assertIn(f"{thumb_file}", stdout)
- self.assertIn("Conversion to WebP completed", stdout)
-
- self.assertFalse(thumb_file.exists())
- self.assertTrue(thumb_file.with_suffix(".webp").exists())
-
- @mock.patch("documents.management.commands.convert_thumbnails.run_convert")
- def test_convert_errors_out(self, run_convert_mock):
- """
- GIVEN:
- - Document exists with PNG thumbnail
- WHEN:
- - Thumbnail conversion is attempted, but raises an exception
- THEN:
- - Single thumbnail is converted
- """
-
- run_convert_mock.side_effect = OSError
-
- with tempfile.TemporaryDirectory() as thumbnail_dir:
-
- with override_settings(
- THUMBNAIL_DIR=thumbnail_dir,
- ):
-
- thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
-
- _, stderr = self.call_command()
-
- run_convert_mock.assert_called_once()
- self.assertIn("Error converting thumbnail", stderr)
- self.assertTrue(thumb_file.exists())
--- /dev/null
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Callable
+from typing import Iterable
+from typing import Union
+from unittest import mock
+
+from django.test import override_settings
+from documents.tests.test_migration_archive_files import thumbnail_path
+from documents.tests.utils import TestMigrations
+
+
+@mock.patch(
+ "documents.migrations.1021_webp_thumbnail_conversion.multiprocessing.pool.Pool.map",
+)
+@mock.patch("documents.migrations.1021_webp_thumbnail_conversion.run_convert")
+class TestMigrateWebPThumbnails(TestMigrations):
+
+ migrate_from = "1020_merge_20220518_1839"
+ migrate_to = "1021_webp_thumbnail_conversion"
+ auto_migrate = False
+
+ def pretend_convert_output(self, *args, **kwargs):
+ """
+ Pretends to do the conversion, by copying the input file
+ to the output file
+ """
+ shutil.copy2(
+ Path(kwargs["input_file"].rstrip("[0]")),
+ Path(kwargs["output_file"]),
+ )
+
+ def pretend_map(self, func: Callable, iterable: Iterable):
+ """
+ Pretends to be the map of a multiprocessing.Pool, but secretly does
+ everything in series
+ """
+ for item in iterable:
+ func(item)
+
+ def create_dummy_thumbnails(
+ self,
+ thumb_dir: Path,
+ ext: str,
+ count: int,
+ start_count: int = 0,
+ ):
+ """
+ Helper to create a certain count of files of given extension in a given directory
+ """
+ for idx in range(count):
+ (Path(thumb_dir) / Path(f"{start_count + idx:07}.{ext}")).touch()
+ # Triple check expected files exist
+ self.assert_file_count_by_extension(ext, thumb_dir, count)
+
+ def create_webp_thumbnail_files(
+ self,
+ thumb_dir: Path,
+ count: int,
+ start_count: int = 0,
+ ):
+ """
+ Creates a dummy WebP thumbnail file in the given directory, based on
+ the database Document
+ """
+ self.create_dummy_thumbnails(thumb_dir, "webp", count, start_count)
+
+ def create_png_thumbnail_file(
+ self,
+ thumb_dir: Path,
+ count: int,
+ start_count: int = 0,
+ ):
+ """
+ Creates a dummy PNG thumbnail file in the given directory, based on
+ the database Document
+ """
+ self.create_dummy_thumbnails(thumb_dir, "png", count, start_count)
+
+ def assert_file_count_by_extension(
+ self,
+ ext: str,
+ dir: Union[str, Path],
+ expected_count: int,
+ ):
+ """
+ Helper to assert a certain count of given extension files in given directory
+ """
+ if not isinstance(dir, Path):
+ dir = Path(dir)
+ matching_files = list(dir.glob(f"*.{ext}"))
+ self.assertEqual(len(matching_files), expected_count)
+
+ def assert_png_file_count(self, dir: Path, expected_count: int):
+ """
+ Helper to assert a certain count of PNG extension files in given directory
+ """
+ self.assert_file_count_by_extension("png", dir, expected_count)
+
+ def assert_webp_file_count(self, dir: Path, expected_count: int):
+ """
+ Helper to assert a certain count of WebP extension files in given directory
+ """
+ self.assert_file_count_by_extension("webp", dir, expected_count)
+
+ def setUp(self):
+
+ self.thumbnail_dir = Path(tempfile.mkdtemp()).resolve()
+
+ return super().setUp()
+
+ def tearDown(self) -> None:
+
+ shutil.rmtree(self.thumbnail_dir)
+
+ return super().tearDown()
+
+ def test_do_nothing_if_converted(
+ self,
+ run_convert_mock: mock.MagicMock,
+ map_mock: mock.MagicMock,
+ ):
+ """
+ GIVEN:
+ - Document exists with default WebP thumbnail path
+ WHEN:
+ - Thumbnail conversion is attempted
+ THEN:
+ - Nothing is converted
+ """
+ map_mock.side_effect = self.pretend_map
+
+ with override_settings(
+ THUMBNAIL_DIR=self.thumbnail_dir,
+ ):
+
+ self.create_webp_thumbnail_files(self.thumbnail_dir, 3)
+
+ self.performMigration()
+ run_convert_mock.assert_not_called()
+
+ self.assert_webp_file_count(self.thumbnail_dir, 3)
+
+ def test_convert_single_thumbnail(
+ self,
+ run_convert_mock: mock.MagicMock,
+ map_mock: mock.MagicMock,
+ ):
+ """
+ GIVEN:
+ - Document exists with PNG thumbnail
+ WHEN:
+ - Thumbnail conversion is attempted
+ THEN:
+ - Single thumbnail is converted
+ """
+ map_mock.side_effect = self.pretend_map
+ run_convert_mock.side_effect = self.pretend_convert_output
+
+ with override_settings(
+ THUMBNAIL_DIR=self.thumbnail_dir,
+ ):
+ self.create_png_thumbnail_file(self.thumbnail_dir, 3)
+
+ self.performMigration()
+
+ run_convert_mock.assert_called()
+ self.assertEqual(run_convert_mock.call_count, 3)
+
+ self.assert_webp_file_count(self.thumbnail_dir, 3)
+
+ def test_convert_errors_out(
+ self,
+ run_convert_mock: mock.MagicMock,
+ map_mock: mock.MagicMock,
+ ):
+ """
+ GIVEN:
+ - Document exists with PNG thumbnail
+ WHEN:
+ - Thumbnail conversion is attempted, but raises an exception
+ THEN:
+ - Single thumbnail is converted
+ """
+ map_mock.side_effect = self.pretend_map
+ run_convert_mock.side_effect = OSError
+
+ with override_settings(
+ THUMBNAIL_DIR=self.thumbnail_dir,
+ ):
+
+ self.create_png_thumbnail_file(self.thumbnail_dir, 3)
+
+ self.performMigration()
+
+ run_convert_mock.assert_called()
+ self.assertEqual(run_convert_mock.call_count, 3)
+
+ self.assert_png_file_count(self.thumbnail_dir, 3)
+
+ def test_convert_mixed(
+ self,
+ run_convert_mock: mock.MagicMock,
+ map_mock: mock.MagicMock,
+ ):
+ """
+ GIVEN:
+ - Document exists with PNG thumbnail
+ WHEN:
+ - Thumbnail conversion is attempted, but raises an exception
+ THEN:
+ - Single thumbnail is converted
+ """
+ map_mock.side_effect = self.pretend_map
+ run_convert_mock.side_effect = self.pretend_convert_output
+
+ with override_settings(
+ THUMBNAIL_DIR=self.thumbnail_dir,
+ ):
+
+ self.create_png_thumbnail_file(self.thumbnail_dir, 3)
+ self.create_webp_thumbnail_files(self.thumbnail_dir, 2, start_count=3)
+
+ self.performMigration()
+
+ run_convert_mock.assert_called()
+ self.assertEqual(run_convert_mock.call_count, 3)
+
+ self.assert_png_file_count(self.thumbnail_dir, 0)
+ self.assert_webp_file_count(self.thumbnail_dir, 5)