lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
"PTH",
] # TODO Enable & remove
-lint.per-file-ignores."src/documents/migrations/0012_auto_20160305_0040.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/documents/migrations/0014_document_checksum.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/documents/migrations/1003_mime_types.py" = [
- "PTH",
-] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/models.py" = [
- "PTH",
"SIM115",
-] # TODO PTH Enable & remove
+]
lint.per-file-ignores."src/documents/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/signals/handlers.py" = [
"PTH",
] # TODO Enable & remove
-lint.per-file-ignores."src/documents/tasks.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/documents/tests/test_api_app_config.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/documents/tests/test_classifier.py" = [
- "PTH",
-] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_consumer.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_management_exporter.py" = [
"PTH",
] # TODO Enable & remove
-lint.per-file-ignores."src/documents/tests/test_management_thumbnails.py" = [
- "PTH",
-] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_migration_archive_files.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_sanity_check.py" = [
"PTH",
] # TODO Enable & remove
-lint.per-file-ignores."src/documents/tests/test_tasks.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/documents/tests/test_views.py" = [
- "PTH",
-] # TODO Enable & remove
lint.per-file-ignores."src/documents/views.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/settings.py" = [
"PTH",
] # TODO Enable & remove
-lint.per-file-ignores."src/paperless/tests/test_checks.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/paperless/urls.py" = [
- "PTH",
-] # TODO Enable & remove
lint.per-file-ignores."src/paperless/views.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_mail/mail.py" = [
"PTH",
] # TODO Enable & remove
-lint.per-file-ignores."src/paperless_mail/preprocessor.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/paperless_tesseract/parsers.py" = [
- "PTH",
-] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
"PTH",
"RUF001",
] # TODO PTH Enable & remove
-lint.per-file-ignores."src/paperless_tika/tests/test_live_tika.py" = [
- "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/paperless_tika/tests/test_tika_parser.py" = [
- "PTH",
-] # TODO Enable & remove
lint.isort.force-single-line = true
[tool.pytest.ini_options]
import shutil
import subprocess
import tempfile
+from pathlib import Path
import gnupg
from django.conf import settings
def move_documents_and_create_thumbnails(apps, schema_editor):
- os.makedirs(
- os.path.join(settings.MEDIA_ROOT, "documents", "originals"),
+ (Path(settings.MEDIA_ROOT) / "documents" / "originals").mkdir(
+ parents=True,
exist_ok=True,
)
- os.makedirs(
- os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"),
+ (Path(settings.MEDIA_ROOT) / "documents" / "thumbnails").mkdir(
+ parents=True,
exist_ok=True,
)
- documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
+ documents: list[str] = os.listdir(Path(settings.MEDIA_ROOT) / "documents")
if set(documents) == {"originals", "thumbnails"}:
return
),
)
- try:
- os.makedirs(settings.SCRATCH_DIR)
- except FileExistsError:
- pass
+ Path(settings.SCRATCH_DIR).mkdir(parents=True, exists_ok=True)
for f in sorted(documents):
if not f.endswith("gpg"):
),
)
- thumb_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
- orig_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
+ thumb_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
+ orig_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
- orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
- orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))
+ orig_source: Path = Path(settings.MEDIA_ROOT) / "documents" / f
+ orig_target: Path = Path(orig_temp) / f.replace(".gpg", "")
- with open(orig_source, "rb") as encrypted:
- with open(orig_target, "wb") as unencrypted:
- unencrypted.write(GnuPG.decrypted(encrypted))
+ with orig_source.open("rb") as encrypted, orig_target.open("wb") as unencrypted:
+ unencrypted.write(GnuPG.decrypted(encrypted))
subprocess.Popen(
(
"-alpha",
"remove",
orig_target,
- os.path.join(thumb_temp, "convert-%04d.png"),
+ Path(thumb_temp) / "convert-%04d.png",
),
).wait()
- thumb_source = os.path.join(thumb_temp, "convert-0000.png")
- thumb_target = os.path.join(
- settings.MEDIA_ROOT,
- "documents",
- "thumbnails",
- re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f),
+ thumb_source: Path = Path(thumb_temp) / "convert-0000.png"
+ thumb_target: Path = (
+ Path(settings.MEDIA_ROOT)
+ / "documents"
+ / "thumbnails"
+ / re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
)
- with open(thumb_source, "rb") as unencrypted:
- with open(thumb_target, "wb") as encrypted:
- encrypted.write(GnuPG.encrypted(unencrypted))
+ with (
+ thumb_source.open("rb") as unencrypted,
+ thumb_target.open("wb") as encrypted,
+ ):
+ encrypted.write(GnuPG.encrypted(unencrypted))
shutil.rmtree(thumb_temp)
shutil.rmtree(orig_temp)
shutil.move(
- os.path.join(settings.MEDIA_ROOT, "documents", f),
- os.path.join(settings.MEDIA_ROOT, "documents", "originals", f),
+ Path(settings.MEDIA_ROOT) / "documents" / f,
+ Path(settings.MEDIA_ROOT) / "documents" / "originals" / f,
)
# Generated by Django 1.9.4 on 2016-03-28 19:09
import hashlib
-import os
+from pathlib import Path
import django.utils.timezone
import gnupg
@property
def source_path(self):
- return os.path.join(
- settings.MEDIA_ROOT,
- "documents",
- "originals",
- f"{self.pk:07}.{self.file_type}.gpg",
- )
+ return (
+ Path(settings.MEDIA_ROOT)
+ / "documents"
+ / "originals"
+ / f"{self.pk:07}.{self.file_type}.gpg"
+ ).as_posix()
@property
def source_file(self):
- return open(self.source_path, "rb")
+ return Path(self.source_path).open("rb")
@property
def file_name(self):
# Generated by Django 3.1.3 on 2020-11-20 11:21
-import os
+from pathlib import Path
import magic
from django.conf import settings
STORAGE_TYPE_GPG = "gpg"
-def source_path(self):
+def source_path(self) -> Path:
if self.filename:
- fname = str(self.filename)
+ fname: str = str(self.filename)
else:
fname = f"{self.pk:07}.{self.file_type}"
if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg"
- return os.path.join(settings.ORIGINALS_DIR, fname)
+ return Path(settings.ORIGINALS_DIR) / fname
def add_mime_types(apps, schema_editor):
documents = Document.objects.all()
for d in documents:
- f = open(source_path(d), "rb")
- if d.storage_type == STORAGE_TYPE_GPG:
- data = GnuPG.decrypted(f)
- else:
- data = f.read(1024)
+ with Path(source_path(d)).open("rb") as f:
+ if d.storage_type == STORAGE_TYPE_GPG:
+ data = GnuPG.decrypted(f)
+ else:
+ data = f.read(1024)
d.mime_type = magic.from_buffer(data, mime=True)
d.save()
- f.close()
-
def add_file_extensions(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
for d in documents:
- d.file_type = os.path.splitext(d.filename)[1].strip(".")
+ d.file_type = Path(d.filename).suffix.lstrip(".")
d.save()
@property
def source_file(self):
- return open(self.source_path, "rb")
+ return Path(self.source_path).open("rb")
@property
def has_archive_version(self) -> bool:
@property
def archive_file(self):
- return open(self.archive_path, "rb")
+ return Path(self.archive_path).open("rb")
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
"""
@property
def thumbnail_file(self):
- return open(self.thumbnail_path, "rb")
+ return Path(self.thumbnail_path).open("rb")
@property
def created_date(self):
with transaction.atomic():
oldDocument = Document.objects.get(pk=document.pk)
if parser.get_archive_path():
- with open(parser.get_archive_path(), "rb") as f:
+ with Path(parser.get_archive_path()).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
# I'm going to save first so that in case the file move
# fails, the database is rolled back.
import json
-import os
+from pathlib import Path
from django.contrib.auth.models import User
from rest_framework import status
THEN:
- old app_logo file is deleted
"""
- with open(
- os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"),
- "rb",
- ) as f:
+ with (Path(__file__).parent / "samples" / "simple.jpg").open("rb") as f:
self.client.patch(
f"{self.ENDPOINT}1/",
{
)
config = ApplicationConfiguration.objects.first()
old_logo = config.app_logo
- self.assertTrue(os.path.exists(old_logo.path))
- with open(
- os.path.join(os.path.dirname(__file__), "samples", "simple.png"),
- "rb",
- ) as f:
+ self.assertTrue(Path(old_logo.path).exists())
+ with (Path(__file__).parent / "samples" / "simple.png").open("rb") as f:
self.client.patch(
f"{self.ENDPOINT}1/",
{
"app_logo": f,
},
)
- self.assertFalse(os.path.exists(old_logo.path))
+ self.assertFalse(Path(old_logo.path).exists())
-import os
import re
import shutil
from pathlib import Path
self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
def test_load_classifier_not_exists(self):
- self.assertFalse(os.path.exists(settings.MODEL_FILE))
+ self.assertFalse(Path(settings.MODEL_FILE).exists())
self.assertIsNone(load_classifier())
@mock.patch("documents.classifier.DocumentClassifier.load")
},
)
@override_settings(
- MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
+ MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(),
)
@pytest.mark.skip(
reason="Disabled caching due to high memory usage - need to investigate.",
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch()
- self.assertTrue(os.path.exists(settings.MODEL_FILE))
+ self.assertTrue(Path(settings.MODEL_FILE).exists())
load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
self.assertIsNone(load_classifier())
- self.assertFalse(os.path.exists(settings.MODEL_FILE))
+ self.assertFalse(Path(settings.MODEL_FILE).exists())
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch()
- self.assertTrue(os.path.exists(settings.MODEL_FILE))
+ self.assertTrue(Path(settings.MODEL_FILE).exists())
load.side_effect = OSError()
self.assertIsNone(load_classifier())
- self.assertTrue(os.path.exists(settings.MODEL_FILE))
+ self.assertTrue(Path(settings.MODEL_FILE).exists())
def test_load_old_classifier_version(self):
shutil.copy(
- os.path.join(os.path.dirname(__file__), "data", "v1.17.4.model.pickle"),
+ Path(__file__).parent / "data" / "v1.17.4.model.pickle",
self.dirs.scratch_dir,
)
with override_settings(
-import os
import shutil
+from pathlib import Path
from unittest import mock
from django.core.management import call_command
filename="test.pdf",
)
shutil.copy(
- os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
+ Path(__file__).parent / "samples" / "simple.pdf",
self.d1.source_path,
)
filename="test2.pdf",
)
shutil.copy(
- os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
+ Path(__file__).parent / "samples" / "simple.pdf",
self.d2.source_path,
)
filename="test3.pdf",
)
shutil.copy(
- os.path.join(os.path.dirname(__file__), "samples", "password-is-test.pdf"),
+ Path(__file__).parent / "samples" / "password-is-test.pdf",
self.d3.source_path,
)
-import os
import shutil
from datetime import timedelta
from pathlib import Path
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
- mtime = os.stat(settings.MODEL_FILE).st_mtime
+ mtime = Path(settings.MODEL_FILE).stat().st_mtime
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
- mtime2 = os.stat(settings.MODEL_FILE).st_mtime
+ mtime2 = Path(settings.MODEL_FILE).stat().st_mtime
self.assertEqual(mtime, mtime2)
doc.content = "test2"
doc.save()
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
- mtime3 = os.stat(settings.MODEL_FILE).st_mtime
+ mtime3 = Path(settings.MODEL_FILE).stat().st_mtime
self.assertNotEqual(mtime2, mtime3)
-import os
import tempfile
from datetime import timedelta
+from pathlib import Path
from django.conf import settings
from django.contrib.auth.models import Permission
content = b"This is a test"
- with open(filename, "wb") as f:
+ with Path(filename).open("wb") as f:
f.write(content)
doc = Document.objects.create(
title="none",
- filename=os.path.basename(filename),
+ filename=Path(filename).name,
mime_type="application/pdf",
)
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
def test_paths_check_no_access(self):
- os.chmod(self.dirs.data_dir, 0o000)
- os.chmod(self.dirs.media_dir, 0o000)
- os.chmod(self.dirs.consumption_dir, 0o000)
+ Path(self.dirs.data_dir).chmod(0o000)
+ Path(self.dirs.media_dir).chmod(0o000)
+ Path(self.dirs.consumption_dir).chmod(0o000)
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
-import os
+from pathlib import Path
from allauth.account import views as allauth_account_views
from allauth.mfa.base import views as allauth_mfa_views
re_path(
r"^logo(?P<path>.*)$",
serve,
- kwargs={"document_root": os.path.join(settings.MEDIA_ROOT, "logo")},
+ kwargs={"document_root": Path(settings.MEDIA_ROOT) / "logo"},
),
# allauth
path(
import abc
-import os
from email import message_from_bytes
from email import policy
from email.message import Message
+from pathlib import Path
from django.conf import settings
from gnupg import GPG
return False
if settings.EMAIL_GNUPG_HOME is None:
return True
- return os.path.isdir(settings.EMAIL_GNUPG_HOME)
+ return Path(settings.EMAIL_GNUPG_HOME).is_dir()
def run(self, message: MailMessage) -> MailMessage:
if not hasattr(message, "obj"):
# the whole text, so do not utilize it in that case
if (
sidecar_file is not None
- and os.path.isfile(sidecar_file)
+ and sidecar_file.is_file()
and self.settings.mode != "redo"
):
text = self.read_file_handle_unicode_errors(sidecar_file)
# no success with the sidecar file, try PDF
- if not os.path.isfile(pdf_file):
+ if not Path(pdf_file).is_file():
return None
try:
from ocrmypdf import SubprocessOutputError
from ocrmypdf.exceptions import DigitalSignatureError
- archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
- sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
+ archive_path = Path(self.tempdir) / "archive.pdf"
+ sidecar_file = Path(self.tempdir) / "sidecar.txt"
args = self.construct_ocrmypdf_parameters(
document_path,
f"Attempting force OCR to get the text.",
)
- archive_path_fallback = Path(
- os.path.join(self.tempdir, "archive-fallback.pdf"),
- )
- sidecar_file_fallback = Path(
- os.path.join(self.tempdir, "sidecar-fallback.txt"),
- )
+ archive_path_fallback = Path(self.tempdir) / "archive-fallback.pdf"
+ sidecar_file_fallback = Path(self.tempdir) / "sidecar-fallback.txt"
# Attempt to run OCR with safe settings.
== "This is an DOCX test document, also made September 14, 2022"
)
assert tika_parser.archive_path is not None
- with open(tika_parser.archive_path, "rb") as f:
+ with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10]
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
in tika_parser.text
)
assert tika_parser.archive_path is not None
- with open(tika_parser.archive_path, "rb") as f:
+ with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10]
def test_tika_fails_multi_part(
)
assert tika_parser.archive_path is not None
- with open(tika_parser.archive_path, "rb") as f:
+ with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10]
assert tika_parser.text == "the content"
assert tika_parser.archive_path is not None
- with open(tika_parser.archive_path, "rb") as f:
+ with Path(tika_parser.archive_path).open("rb") as f:
assert f.read() == b"PDF document"
assert tika_parser.date == datetime.datetime(