archive filenames are now stored in the database and checked for collisions just...

author jonaswinkler <jonas.winkler@jpwinkler.de>

Tue, 9 Feb 2021 18:46:19 +0000 (19:46 +0100)

committer jonaswinkler <jonas.winkler@jpwinkler.de>

Tue, 9 Feb 2021 18:46:19 +0000 (19:46 +0100)
author jonaswinkler <jonas.winkler@jpwinkler.de>
Tue, 9 Feb 2021 18:46:19 +0000 (19:46 +0100)
committer jonaswinkler <jonas.winkler@jpwinkler.de>
Tue, 9 Feb 2021 18:46:19 +0000 (19:46 +0100)
diff --git a/src/documents/consumer.py b/src/documents/consumer.py

index 3baedb5c3b6e64abb07b5a5e3252992a0acb315b..acb3ad33f197dd547e0a0919fd4056dc90970447 100755 (executable)
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -292,8 +292,7 @@ class Consumer(LoggingMixin):
                  # After everything is in the database, copy the files into
                  # place. If this fails, we'll also rollback the transaction.
                  with FileLock(settings.MEDIA_LOCK):
-                    document.filename = generate_unique_filename(
-                        document, settings.ORIGINALS_DIR)
+                    document.filename = generate_unique_filename(document)
                      create_source_path_directory(document.source_path)
  
                      self._write(document.storage_type,
@@ -303,6 +302,10 @@ class Consumer(LoggingMixin):
                                  thumbnail, document.thumbnail_path)
  
                      if archive_path and os.path.isfile(archive_path):
+                        document.archive_filename = generate_unique_filename(
+                            document,
+                            archive_filename=True
+                        )
                          create_source_path_directory(document.archive_path)
                          self._write(document.storage_type,
                                      archive_path, document.archive_path)
diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py

index 64858de78a95112223132f1b6b4c8ecfde526f5c..535aa3d2c084a37e5bec0315c4d53f0ac024cb0c 100644 (file)
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -79,12 +79,20 @@ def many_to_dictionary(field):
      return mydictionary
  
  
-def generate_unique_filename(doc, root):
+def generate_unique_filename(doc, archive_filename=False):
+    if archive_filename:
+        old_filename = doc.archive_filename
+        root = settings.ARCHIVE_DIR
+    else:
+        old_filename = doc.filename
+        root = settings.ORIGINALS_DIR
+
      counter = 0
  
      while True:
-        new_filename = generate_filename(doc, counter)
-        if new_filename == doc.filename:
+        new_filename = generate_filename(
+            doc, counter, archive_filename=archive_filename)
+        if new_filename == old_filename:
              # still the same as before.
              return new_filename
  
@@ -94,7 +102,7 @@ def generate_unique_filename(doc, root):
              return new_filename
  
  
-def generate_filename(doc, counter=0, append_gpg=True):
+def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
      path = ""
  
      try:
@@ -148,21 +156,16 @@ def generate_filename(doc, counter=0, append_gpg=True):
              f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
  
      counter_str = f"_{counter:02}" if counter else ""
+
+    filetype_str = ".pdf" if archive_filename else doc.file_type
+
      if len(path) > 0:
-        filename = f"{path}{counter_str}{doc.file_type}"
+        filename = f"{path}{counter_str}{filetype_str}"
      else:
-        filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
+        filename = f"{doc.pk:07}{counter_str}{filetype_str}"
  
      # Append .gpg for encrypted files
      if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
          filename += ".gpg"
  
      return filename
-
-
-def archive_name_from_filename(filename):
-    name, ext = os.path.splitext(filename)
-    if ext == ".pdf":
-        return filename
-    else:
-        return filename + ".pdf"
diff --git a/src/documents/management/commands/document_archiver.py b/src/documents/management/commands/document_archiver.py

index d2ff9c8c210be1d66cafaf2375692addd9f1c8be..fe8c8b530b374bf31daa3eda35d64febffe93180 100644 (file)
--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@@ -16,7 +16,8 @@ from whoosh.writing import AsyncWriter
  
  from documents.models import Document
  from ... import index
-from ...file_handling import create_source_path_directory
+from ...file_handling import create_source_path_directory, \
+    generate_unique_filename
  from ...parsers import get_parser_class_for_mime_type
  
  
@@ -39,13 +40,16 @@ def handle_document(document_id):
              with transaction.atomic():
                  with open(parser.get_archive_path(), 'rb') as f:
                      checksum = hashlib.md5(f.read()).hexdigest()
-                # i'm going to save first so that in case the file move
+                # I'm going to save first so that in case the file move
                  # fails, the database is rolled back.
-                # we also don't use save() since that triggers the filehandling
+                # We also don't use save() since that triggers the filehandling
                  # logic, and we don't want that yet (file not yet in place)
+                document.archive_filename = generate_unique_filename(
+                    document, archive_filename=True)
                  Document.objects.filter(pk=document.pk).update(
                      archive_checksum=checksum,
-                    content=parser.get_text()
+                    content=parser.get_text(),
+                    archive_filename=document.archive_filename
                  )
                  with FileLock(settings.MEDIA_LOCK):
                      create_source_path_directory(document.archive_path)
@@ -101,7 +105,7 @@ class Command(BaseCommand):
          document_ids = list(map(
              lambda doc: doc.id,
              filter(
-                lambda d: overwrite or not d.archive_checksum,
+                lambda d: overwrite or not d.has_archive_version,
                  documents
              )
          ))
diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py

index 1505b085695814248c0905f15c60d05e9a8d849b..d8f5861a52a48955720e911eaa24b24087a4c42d 100644 (file)
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -139,7 +139,7 @@ class Command(BaseCommand):
              thumbnail_target = os.path.join(self.target, thumbnail_name)
              document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
  
-            if os.path.exists(document.archive_path):
+            if document.has_archive_version:
                  archive_name = base_name + "-archive.pdf"
                  archive_target = os.path.join(self.target, archive_name)
                  document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
diff --git a/src/documents/migrations/1012_fix_archive_files.py b/src/documents/migrations/1012_fix_archive_files.py

index f75ae431d26778c331686becf8092c134aea0033..5db5033917aa216743296c8b21473b0166f917c8 100644 (file)
--- a/src/documents/migrations/1012_fix_archive_files.py
+++ b/src/documents/migrations/1012_fix_archive_files.py
@@ -1,23 +1,27 @@
  # Generated by Django 3.1.6 on 2021-02-07 22:26
+import datetime
  import hashlib
  import logging
  import os
  import shutil
  
+import pathvalidate
  from django.conf import settings
-from django.db import migrations
+from django.db import migrations, models
+from django.template.defaultfilters import slugify
  
+from documents.file_handling import defaultdictNoStr, many_to_dictionary
  
  logger = logging.getLogger("paperless.migrations")
  
  
-def archive_name_from_filename_old(filename):
+def archive_name_from_filename(filename):
      return os.path.splitext(filename)[0] + ".pdf"
  
  
  def archive_path_old(doc):
      if doc.filename:
-        fname = archive_name_from_filename_old(doc.filename)
+        fname = archive_name_from_filename(doc.filename)
      else:
          fname = "{:07}.pdf".format(doc.pk)
  
@@ -27,27 +31,17 @@ def archive_path_old(doc):
      )
  
  
-def archive_name_from_filename_new(filename):
-    name, ext = os.path.splitext(filename)
-    if ext == ".pdf":
-        return filename
-    else:
-        return filename + ".pdf"
+STORAGE_TYPE_GPG = "gpg"
  
  
  def archive_path_new(doc):
-    if doc.filename:
-        fname = archive_name_from_filename_new(doc.filename)
+    if doc.archive_filename is not None:
+        return os.path.join(
+            settings.ARCHIVE_DIR,
+            str(doc.archive_filename)
+        )
      else:
-        fname = "{:07}.pdf".format(doc.pk)
-
-    return os.path.join(
-        settings.ARCHIVE_DIR,
-        fname
-    )
-
-
-STORAGE_TYPE_GPG = "gpg"
+        return None
  
  
  def source_path(doc):
@@ -64,6 +58,98 @@ def source_path(doc):
      )
  
  
+def generate_unique_filename(doc, archive_filename=False):
+    if archive_filename:
+        old_filename = doc.archive_filename
+        root = settings.ARCHIVE_DIR
+    else:
+        old_filename = doc.filename
+        root = settings.ORIGINALS_DIR
+
+    counter = 0
+
+    while True:
+        new_filename = generate_filename(
+            doc, counter, archive_filename=archive_filename)
+        if new_filename == old_filename:
+            # still the same as before.
+            return new_filename
+
+        if os.path.exists(os.path.join(root, new_filename)):
+            counter += 1
+        else:
+            return new_filename
+
+
+def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
+    path = ""
+
+    try:
+        if settings.PAPERLESS_FILENAME_FORMAT is not None:
+            tags = defaultdictNoStr(lambda: slugify(None),
+                                    many_to_dictionary(doc.tags))
+
+            tag_list = pathvalidate.sanitize_filename(
+                ",".join(sorted(
+                    [tag.name for tag in doc.tags.all()]
+                )),
+                replacement_text="-"
+            )
+
+            if doc.correspondent:
+                correspondent = pathvalidate.sanitize_filename(
+                    doc.correspondent.name, replacement_text="-"
+                )
+            else:
+                correspondent = "none"
+
+            if doc.document_type:
+                document_type = pathvalidate.sanitize_filename(
+                    doc.document_type.name, replacement_text="-"
+                )
+            else:
+                document_type = "none"
+
+            path = settings.PAPERLESS_FILENAME_FORMAT.format(
+                title=pathvalidate.sanitize_filename(
+                    doc.title, replacement_text="-"),
+                correspondent=correspondent,
+                document_type=document_type,
+                created=datetime.date.isoformat(doc.created),
+                created_year=doc.created.year if doc.created else "none",
+                created_month=f"{doc.created.month:02}" if doc.created else "none",  # NOQA: E501
+                created_day=f"{doc.created.day:02}" if doc.created else "none",
+                added=datetime.date.isoformat(doc.added),
+                added_year=doc.added.year if doc.added else "none",
+                added_month=f"{doc.added.month:02}" if doc.added else "none",
+                added_day=f"{doc.added.day:02}" if doc.added else "none",
+                tags=tags,
+                tag_list=tag_list
+            ).strip()
+
+            path = path.strip(os.sep)
+
+    except (ValueError, KeyError, IndexError):
+        logger.warning(
+            f"Invalid PAPERLESS_FILENAME_FORMAT: "
+            f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
+
+    counter_str = f"_{counter:02}" if counter else ""
+
+    filetype_str = ".pdf" if archive_filename else doc.file_type
+
+    if len(path) > 0:
+        filename = f"{path}{counter_str}{filetype_str}"
+    else:
+        filename = f"{doc.pk:07}{counter_str}{filetype_str}"
+
+    # Append .gpg for encrypted files
+    if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
+        filename += ".gpg"
+
+    return filename
+
+
  def move_old_to_new_locations(apps, schema_editor):
      Document = apps.get_model("documents", "Document")
  
@@ -74,18 +160,12 @@ def move_old_to_new_locations(apps, schema_editor):
      # check for documents that have incorrect archive versions
      for doc in Document.objects.filter(archive_checksum__isnull=False):
          old_path = archive_path_old(doc)
-        new_path = archive_path_new(doc)
  
          if not os.path.isfile(old_path):
              raise ValueError(
                  f"Archived document of {doc.filename} does not exist at: "
                  f"{old_path}")
  
-        if old_path != new_path and os.path.isfile(new_path):
-            raise ValueError(
-                f"Need to move {old_path} to {new_path}, but target file "
-                f"already exists")
-
          if old_path in old_archive_path_to_id:
              affected_document_ids.add(doc.id)
              affected_document_ids.add(old_archive_path_to_id[old_path])
@@ -103,22 +183,19 @@ def move_old_to_new_locations(apps, schema_editor):
                  f"document {doc.filename} has an invalid archived document, "
                  f"but no parsers are available. Cannot migrate.")
  
-    # move files
      for doc in Document.objects.filter(archive_checksum__isnull=False):
-        old_path = archive_path_old(doc)
-        new_path = archive_path_new(doc)
  
          if doc.id in affected_document_ids:
+            old_path = archive_path_old(doc)
              # remove affected archive versions
              if os.path.isfile(old_path):
                  os.unlink(old_path)
          else:
-            # move unaffected archive versions
-            if old_path != new_path and os.path.isfile(old_path) and not os.path.isfile(new_path):
-                logger.debug(
-                    f"Moving {old_path} to {new_path}"
-                )
-                shutil.move(old_path, new_path)
+            # Set archive path for unaffected files
+            doc.archive_filename = archive_path_old(doc)
+            Document.objects.filter(id=doc.id).update(
+                archive_filename=doc.archive_filename
+            )
  
      # regenerate archive documents
      for doc_id in affected_document_ids:
@@ -135,14 +212,16 @@ def move_old_to_new_locations(apps, schema_editor):
          try:
              parser.parse(source_path(doc), doc.mime_type, os.path.basename(doc.filename))
              doc.content = parser.get_text()
-            if parser.archive_path and os.path.isfile(parser.archive_path):
-                with open(parser.archive_path, "rb") as f:
+
+            if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()):
+                doc.archive_filename = generate_unique_filename(
+                    doc, archive_filename=True)
+                with open(parser.get_archive_path(), "rb") as f:
                      doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
-                shutil.copy2(parser.archive_path, archive_path_new(doc))
+                os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True)
+                shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
              else:
                  doc.archive_checksum = None
-                if os.path.isfile(archive_path_new(doc)):
-                    os.unlink(archive_path_new(doc))
              doc.save()
          except ParseError:
              logger.exception(
@@ -187,8 +266,18 @@ class Migration(migrations.Migration):
      ]
  
      operations = [
+        migrations.AddField(
+            model_name='document',
+            name='archive_filename',
+            field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
+        ),
+        migrations.AlterField(
+            model_name='document',
+            name='filename',
+            field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
+        ),
          migrations.RunPython(
              move_old_to_new_locations,
              move_new_to_old_locations
-        )
+        ),
      ]
diff --git a/src/documents/models.py b/src/documents/models.py

index 86878dd7ebe4754150946f2df9f1fa3bcbc45b18..47433724a36f2a2aea4ab0467a44769fcd8908e2 100755 (executable)
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -16,7 +16,6 @@ from django.utils.timezone import is_aware
  
  from django.utils.translation import gettext_lazy as _
  
-from documents.file_handling import archive_name_from_filename
  from documents.parsers import get_default_file_extension
  
  
@@ -208,10 +207,21 @@ class Document(models.Model):
          max_length=1024,
          editable=False,
          default=None,
+        unique=True,
          null=True,
          help_text=_("Current filename in storage")
      )
  
+    archive_filename = models.FilePathField(
+        _("archive filename"),
+        max_length=1024,
+        editable=False,
+        default=None,
+        unique=True,
+        null=True,
+        help_text=_("Current archive filename in storage")
+    )
+
      archive_serial_number = models.IntegerField(
          _("archive serial number"),
          blank=True,
@@ -255,17 +265,20 @@ class Document(models.Model):
      def source_file(self):
          return open(self.source_path, "rb")
  
+    @property
+    def has_archive_version(self):
+        return self.archive_filename is not None
+
      @property
      def archive_path(self):
-        if self.filename:
-            fname = archive_name_from_filename(self.filename)
+        if self.has_archive_version:
+            return os.path.join(
+                settings.ARCHIVE_DIR,
+                str(self.archive_filename)
+            )
          else:
-            fname = "{:07}.pdf".format(self.pk)
+            return None
  
-        return os.path.join(
-            settings.ARCHIVE_DIR,
-            fname
-        )
  
      @property
      def archive_file(self):
diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py

index b8fd73f9808e27ba036a6e488645843ec4f46047..0b385e81bf7c41102735e3139b7cb75a169c9b43 100644 (file)
--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -88,7 +88,7 @@ def check_sanity():
                      ))
  
          # Check sanity of the archive file.
-        if doc.archive_checksum:
+        if doc.has_archive_version:
              if not os.path.isfile(doc.archive_path):
                  messages.append(SanityError(
                      f"Archived version of document {doc.pk} does not exist."
diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py

index 848bcb90010059b74d35fe1f0355ca34df733043..5d46dc431de2944e5a002d03f5abedb84ad3e46b 100755 (executable)
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -14,7 +14,7 @@ from filelock import FileLock
  
  from .. import index, matching
  from ..file_handling import delete_empty_directories, \
-    create_source_path_directory, archive_name_from_filename, \
+    create_source_path_directory, \
      generate_unique_filename
  from ..models import Document, Tag
  
@@ -148,18 +148,18 @@ def set_tags(sender,
  @receiver(models.signals.post_delete, sender=Document)
  def cleanup_document_deletion(sender, instance, using, **kwargs):
      with FileLock(settings.MEDIA_LOCK):
-        for f in (instance.source_path,
-                  instance.archive_path,
-                  instance.thumbnail_path):
-            if os.path.isfile(f):
+        for filename in (instance.source_path,
+                         instance.archive_path,
+                         instance.thumbnail_path):
+            if filename and os.path.isfile(filename):
                  try:
-                    os.unlink(f)
+                    os.unlink(filename)
                      logger.debug(
-                        f"Deleted file {f}.")
+                        f"Deleted file {filename}.")
                  except OSError as e:
                      logger.warning(
                          f"While deleting document {str(instance)}, the file "
-                        f"{f} could not be deleted: {e}"
+                        f"{filename} could not be deleted: {e}"
                      )
  
          delete_empty_directories(
@@ -167,10 +167,11 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
              root=settings.ORIGINALS_DIR
          )
  
-        delete_empty_directories(
-            os.path.dirname(instance.archive_path),
-            root=settings.ARCHIVE_DIR
-        )
+        if instance.has_archive_version:
+            delete_empty_directories(
+                os.path.dirname(instance.archive_path),
+                root=settings.ARCHIVE_DIR
+            )
  
  
  def validate_move(instance, old_path, new_path):
@@ -207,8 +208,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
  
      with FileLock(settings.MEDIA_LOCK):
          old_filename = instance.filename
-        new_filename = generate_unique_filename(
-            instance, settings.ORIGINALS_DIR)
+        new_filename = generate_unique_filename(instance)
  
          if new_filename == instance.filename:
              # Don't do anything if its the same.
@@ -222,8 +222,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
  
          # archive files are optional, archive checksum tells us if we have one,
          # since this is None for documents without archived files.
-        if instance.archive_checksum:
-            new_archive_filename = archive_name_from_filename(new_filename)
+        if instance.has_archive_version:
+            old_archive_filename = instance.archive_filename
+            new_archive_filename = generate_unique_filename(
+                instance, archive_filename=True
+            )
              old_archive_path = instance.archive_path
              new_archive_path = os.path.join(settings.ARCHIVE_DIR,
                                              new_archive_filename)
@@ -233,6 +236,8 @@ def update_filename_and_move_files(sender, instance, **kwargs):
  
              create_source_path_directory(new_archive_path)
          else:
+            old_archive_filename = None
+            new_archive_filename = None
              old_archive_path = None
              new_archive_path = None
  
@@ -240,22 +245,28 @@ def update_filename_and_move_files(sender, instance, **kwargs):
  
          try:
              os.rename(old_source_path, new_source_path)
-            if instance.archive_checksum:
-                os.rename(old_archive_path, new_archive_path)
              instance.filename = new_filename
  
+            if instance.has_archive_version:
+                os.rename(old_archive_path, new_archive_path)
+                instance.archive_filename = new_archive_filename
+
              # Don't save() here to prevent infinite recursion.
              Document.objects.filter(pk=instance.pk).update(
-                filename=new_filename)
+                filename=instance.filename,
+                archive_filename=instance.archive_filename,
+            )
  
          except OSError as e:
              instance.filename = old_filename
+            instance.archive_filename = old_archive_filename
              # this happens when we can't move a file. If that's the case for
              # the archive file, we try our best to revert the changes.
              # no need to save the instance, the update() has not happened yet.
              try:
                  os.rename(new_source_path, old_source_path)
-                os.rename(new_archive_path, old_archive_path)
+                if instance.has_archive_version:
+                    os.rename(new_archive_path, old_archive_path)
              except Exception as e:
                  # This is fine, since:
                  # A: if we managed to move source from A to B, we will also
@@ -271,9 +282,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
              # since moving them once succeeded, it's very likely going to
              # succeed again.
              os.rename(new_source_path, old_source_path)
-            if instance.archive_checksum:
+            if instance.has_archive_version:
                  os.rename(new_archive_path, old_archive_path)
              instance.filename = old_filename
+            instance.archive_filename = old_archive_filename
              # again, no need to save the instance, since the actual update()
              # operation failed.
  
@@ -283,7 +295,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
              delete_empty_directories(os.path.dirname(old_source_path),
                                       root=settings.ORIGINALS_DIR)
  
-        if old_archive_path and not os.path.isfile(old_archive_path):
+        if instance.has_archive_version and not os.path.isfile(old_archive_path):  # NOQA: E501
              delete_empty_directories(os.path.dirname(old_archive_path),
                                       root=settings.ARCHIVE_DIR)
  
diff --git a/src/documents/views.py b/src/documents/views.py

index 5ab4ca9dfe470f500687e296616e8230d6594e22..3a840567db727a24a09cb2d2fc057961ebee5365 100755 (executable)
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -192,7 +192,7 @@ class DocumentViewSet(RetrieveModelMixin,
  
      def file_response(self, pk, request, disposition):
          doc = Document.objects.get(id=pk)
-        if not self.original_requested(request) and os.path.isfile(doc.archive_path):  # NOQA: E501
+        if not self.original_requested(request) and doc.has_archive_version:  # NOQA: E501
              file_handle = doc.archive_file
              filename = doc.get_public_filename(archive=True)
              mime_type = 'application/pdf'
@@ -237,18 +237,18 @@ class DocumentViewSet(RetrieveModelMixin,
              "original_size": os.stat(doc.source_path).st_size,
              "original_mime_type": doc.mime_type,
              "media_filename": doc.filename,
-            "has_archive_version": os.path.isfile(doc.archive_path),
+            "has_archive_version": doc.has_archive_version,
              "original_metadata": self.get_metadata(
-                doc.source_path, doc.mime_type)
+                doc.source_path, doc.mime_type),
+            "archive_checksum": doc.archive_checksum,
+            "archive_media_filename": doc.archive_filename
          }
  
-        if doc.archive_checksum and os.path.isfile(doc.archive_path):
-            meta['archive_checksum'] = doc.archive_checksum
+        if doc.has_archive_version:
              meta['archive_size'] = os.stat(doc.archive_path).st_size,
              meta['archive_metadata'] = self.get_metadata(
                  doc.archive_path, "application/pdf")
          else:
-            meta['archive_checksum'] = None
              meta['archive_size'] = None
              meta['archive_metadata'] = None
author	jonaswinkler <jonas.winkler@jpwinkler.de>
	Tue, 9 Feb 2021 18:46:19 +0000 (19:46 +0100)
committer	jonaswinkler <jonas.winkler@jpwinkler.de>
	Tue, 9 Feb 2021 18:46:19 +0000 (19:46 +0100)
src/documents/consumer.py		patch \| blob \| blame \| history
src/documents/file_handling.py		patch \| blob \| blame \| history
src/documents/management/commands/document_archiver.py		patch \| blob \| blame \| history
src/documents/management/commands/document_exporter.py		patch \| blob \| blame \| history
src/documents/migrations/1012_fix_archive_files.py		patch \| blob \| blame \| history
src/documents/models.py		patch \| blob \| blame \| history
src/documents/sanity_checker.py		patch \| blob \| blame \| history
src/documents/signals/handlers.py		patch \| blob \| blame \| history
src/documents/views.py		patch \| blob \| blame \| history