]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fixes issues with copy2 or copystat and SELinux see #3665
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Fri, 21 Jul 2023 21:19:26 +0000 (14:19 -0700)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Sat, 22 Jul 2023 13:27:49 +0000 (06:27 -0700)
src/documents/barcodes.py
src/documents/consumer.py
src/documents/management/commands/document_exporter.py
src/documents/management/commands/document_importer.py
src/documents/parsers.py
src/documents/tests/test_management_exporter.py
src/documents/utils.py [new file with mode: 0644]

index 3650593ae80866ad26b31e6987671481b05c0265..cabc195b3956bcb862e9aa90dd7b022b9902618c 100644 (file)
@@ -1,5 +1,4 @@
 import logging
-import shutil
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
@@ -18,6 +17,8 @@ from pikepdf import Pdf
 from PIL import Image
 
 from documents.data_models import DocumentSource
+from documents.utils import copy_basic_file_stats
+from documents.utils import copy_file_with_basic_stats
 
 logger = logging.getLogger("paperless.barcodes")
 
@@ -181,7 +182,7 @@ class BarcodeReader:
             pdf_file.write(img2pdf.convert(img_file))
 
         # Copy what file stat is possible
-        shutil.copystat(self.file, self.pdf_file)
+        copy_basic_file_stats(self.file, self.pdf_file)
 
     def detect(self) -> None:
         """
@@ -306,7 +307,7 @@ class BarcodeReader:
                 with open(savepath, "wb") as out:
                     dst.save(out)
 
-                shutil.copystat(self.file, savepath)
+                copy_basic_file_stats(self.file, savepath)
 
                 document_paths.append(savepath)
 
@@ -363,5 +364,5 @@ class BarcodeReader:
             else:
                 dest = save_to_dir
             logger.info(f"Saving {document_path} to {dest}")
-            shutil.copy2(document_path, dest)
+            copy_file_with_basic_stats(document_path, dest)
         return True
index fde8e2d4c23ccc716a41f1a1bdaa25f58159772c..c2669c00a247f082c9050475bacc0bc442b34226 100644 (file)
@@ -1,7 +1,6 @@
 import datetime
 import hashlib
 import os
-import shutil
 import tempfile
 import uuid
 from pathlib import Path
@@ -21,6 +20,9 @@ from django.utils import timezone
 from filelock import FileLock
 from rest_framework.reverse import reverse
 
+from documents.utils import copy_basic_file_stats
+from documents.utils import copy_file_with_basic_stats
+
 from .classifier import load_classifier
 from .file_handling import create_source_path_directory
 from .file_handling import generate_unique_filename
@@ -326,7 +328,7 @@ class Consumer(LoggingMixin):
             dir=settings.SCRATCH_DIR,
         )
         self.path = Path(tempdir.name) / Path(self.filename)
-        shutil.copy2(self.original_path, self.path)
+        copy_file_with_basic_stats(self.original_path, self.path)
 
         # Determine the parser class.
 
@@ -585,7 +587,7 @@ class Consumer(LoggingMixin):
 
         # Attempt to copy file's original stats, but it's ok if we can't
         try:
-            shutil.copystat(source, target)
+            copy_basic_file_stats(source, target)
         except Exception:  # pragma: no cover
             pass
 
index 22fb5930878e26b98fb9c76afb52d1d44887c437..9484d86bb3bef377058a4b31fca84666400538bd 100644 (file)
@@ -37,6 +37,7 @@ from documents.models import UiSettings
 from documents.settings import EXPORTER_ARCHIVE_NAME
 from documents.settings import EXPORTER_FILE_NAME
 from documents.settings import EXPORTER_THUMBNAIL_NAME
+from documents.utils import copy_file_with_basic_stats
 from paperless import version
 from paperless.db import GnuPG
 from paperless_mail.models import MailAccount
@@ -437,4 +438,4 @@ class Command(BaseCommand):
 
         if perform_copy:
             target.parent.mkdir(parents=True, exist_ok=True)
-            shutil.copy2(source, target)
+            copy_file_with_basic_stats(source, target)
index baf6d75285a1a64f6a936952973446bbdb12bc2f..eac967dde343cc7ab54944060079a3ca348a7da3 100644 (file)
@@ -1,7 +1,6 @@
 import json
 import logging
 import os
-import shutil
 from contextlib import contextmanager
 from pathlib import Path
 
@@ -27,6 +26,7 @@ from documents.settings import EXPORTER_ARCHIVE_NAME
 from documents.settings import EXPORTER_FILE_NAME
 from documents.settings import EXPORTER_THUMBNAIL_NAME
 from documents.signals.handlers import update_filename_and_move_files
+from documents.utils import copy_file_with_basic_stats
 from paperless import version
 
 
@@ -246,7 +246,7 @@ class Command(BaseCommand):
 
                 create_source_path_directory(document.source_path)
 
-                shutil.copy2(document_path, document.source_path)
+                copy_file_with_basic_stats(document_path, document.source_path)
 
                 if thumbnail_path:
                     if thumbnail_path.suffix in {".png", ".PNG"}:
@@ -261,13 +261,16 @@ class Command(BaseCommand):
                             output_file=str(document.thumbnail_path),
                         )
                     else:
-                        shutil.copy2(thumbnail_path, document.thumbnail_path)
+                        copy_file_with_basic_stats(
+                            thumbnail_path,
+                            document.thumbnail_path,
+                        )
 
                 if archive_path:
                     create_source_path_directory(document.archive_path)
                     # TODO: this assumes that the export is valid and
                     #  archive_filename is present on all documents with
                     #  archived files
-                    shutil.copy2(archive_path, document.archive_path)
+                    copy_file_with_basic_stats(archive_path, document.archive_path)
 
             document.save()
index e1d7365fbf8478e893328c79d44f7596038add3b..cdf681398e477ffb6f294c65f9e5d2d198feadcc 100644 (file)
@@ -18,6 +18,7 @@ from django.utils import timezone
 
 from documents.loggers import LoggingMixin
 from documents.signals import document_consumer_declaration
+from documents.utils import copy_file_with_basic_stats
 
 # This regular expression will try to find dates in the document at
 # hand and will match the following formats:
@@ -206,7 +207,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
         # so we need to copy it before it gets moved.
         # https://github.com/paperless-ngx/paperless-ngx/issues/3631
         default_thumbnail_path = os.path.join(temp_dir, "document.png")
-        shutil.copy2(get_default_thumbnail(), default_thumbnail_path)
+        copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
         return default_thumbnail_path
 
 
index 421ae51fca96c9360940f6e09b8d32b092f8fa80..4da93ee5086ecc090800002fcf44a99421ae7e94 100644 (file)
@@ -277,7 +277,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
 
         with mock.patch(
-            "documents.management.commands.document_exporter.shutil.copy2",
+            "documents.management.commands.document_exporter.copy_file_with_basic_stats",
         ) as m:
             self._do_export()
             m.assert_not_called()
@@ -288,7 +288,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         Path(self.d1.source_path).touch()
 
         with mock.patch(
-            "documents.management.commands.document_exporter.shutil.copy2",
+            "documents.management.commands.document_exporter.copy_file_with_basic_stats",
         ) as m:
             self._do_export()
             self.assertEqual(m.call_count, 1)
@@ -311,7 +311,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         self.assertIsFile(os.path.join(self.target, "manifest.json"))
 
         with mock.patch(
-            "documents.management.commands.document_exporter.shutil.copy2",
+            "documents.management.commands.document_exporter.copy_file_with_basic_stats",
         ) as m:
             self._do_export()
             m.assert_not_called()
@@ -322,7 +322,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         self.d2.save()
 
         with mock.patch(
-            "documents.management.commands.document_exporter.shutil.copy2",
+            "documents.management.commands.document_exporter.copy_file_with_basic_stats",
         ) as m:
             self._do_export(compare_checksums=True)
             self.assertEqual(m.call_count, 1)
diff --git a/src/documents/utils.py b/src/documents/utils.py
new file mode 100644 (file)
index 0000000..45496fc
--- /dev/null
@@ -0,0 +1,43 @@
+import shutil
+from os import utime
+from pathlib import Path
+from typing import Tuple
+from typing import Union
+
+
+def _coerce_to_path(
+    source: Union[Path, str],
+    dest: Union[Path, str],
+) -> Tuple[Path, Path]:
+    return Path(source).resolve(), Path(dest).resolve()
+
+
+def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> None:
+    """
+    Copies only the m_time and a_time attributes from source to destination.
+    Both are expected to exist.
+
+    The extended attribute copy does weird things with SELinux and files
+    copied from temporary directories and copystat doesn't allow disabling
+    these copies
+    """
+    source, dest = _coerce_to_path(source, dest)
+    src_stat = source.stat()
+    utime(dest, ns=(src_stat.st_atime_ns, src_stat.st_mtime_ns))
+
+
+def copy_file_with_basic_stats(
+    source: Union[Path, str],
+    dest: Union[Path, str],
+) -> None:
+    """
+    A sort of simpler copy2 that doesn't copy extended file attributes,
+    only the access time and modified times from source to dest.
+
+    The extended attribute copy does weird things with SELinux and files
+    copied from temporary directories.
+    """
+    source, dest = _coerce_to_path(source, dest)
+
+    shutil.copy(source, dest)
+    copy_basic_file_stats(source, dest)