]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Chore: switch from os.path to pathlib.Path (#9933)
authorSebastian Steinbeißer <33968289+gothicVI@users.noreply.github.com>
Wed, 18 Jun 2025 17:16:59 +0000 (19:16 +0200)
committerGitHub <noreply@github.com>
Wed, 18 Jun 2025 17:16:59 +0000 (17:16 +0000)
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
pyproject.toml
src/documents/views.py
src/paperless/checks.py
src/paperless/tests/test_checks.py
src/paperless_mail/mail.py
src/paperless_tesseract/tests/test_parser.py

index 4e0d5edb2fd977074c207e14f6dda9eae8da7c18..04b9bd6fe5a4d756abbc87d5ed64e558c4fc4dba 100644 (file)
@@ -221,22 +221,12 @@ lint.per-file-ignores."src/documents/parsers.py" = [
 lint.per-file-ignores."src/documents/signals/handlers.py" = [
   "PTH",
 ] # TODO Enable & remove
-lint.per-file-ignores."src/documents/views.py" = [
-  "PTH",
-] # TODO Enable & remove
-lint.per-file-ignores."src/paperless/checks.py" = [
-  "PTH",
-] # TODO Enable & remove
 lint.per-file-ignores."src/paperless/settings.py" = [
   "PTH",
 ] # TODO Enable & remove
-lint.per-file-ignores."src/paperless_mail/mail.py" = [
-  "PTH",
-] # TODO Enable & remove
 lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
-  "PTH",
   "RUF001",
-] # TODO PTH Enable & remove
+]
 lint.isort.force-single-line = true
 
 [tool.pytest.ini_options]
index 1b9b3522630aca9fd2fcb0af11c4317567a1a516..f49651eb4c60abcb117e85d4de04f9b05d12819a 100644 (file)
@@ -650,7 +650,7 @@ class DocumentViewSet(
         )
 
     def get_metadata(self, file, mime_type):
-        if not os.path.isfile(file):
+        if not Path(file).is_file():
             return None
 
         parser_class = get_parser_class_for_mime_type(mime_type)
@@ -668,8 +668,8 @@ class DocumentViewSet(
             return []
 
     def get_filesize(self, filename):
-        if os.path.isfile(filename):
-            return os.stat(filename).st_size
+        if Path(filename).is_file():
+            return Path(filename).stat().st_size
         else:
             return None
 
@@ -1215,31 +1215,37 @@ class UnifiedSearchViewSet(DocumentViewSet):
 class LogViewSet(ViewSet):
     permission_classes = (IsAuthenticated, PaperlessAdminPermissions)
 
-    log_files = ["paperless", "mail", "celery"]
+    ALLOWED_LOG_FILES = {
+        "paperless": "paperless.log",
+        "mail": "mail.log",
+        "celery": "celery.log",
+    }
 
-    def get_log_filename(self, log):
-        return os.path.join(settings.LOGGING_DIR, f"{log}.log")
+    def get_log_file(self, log_key: str) -> Path:
+        return Path(settings.LOGGING_DIR) / self.ALLOWED_LOG_FILES[log_key]
 
     def retrieve(self, request, *args, **kwargs):
-        log_file = kwargs.get("pk")
-        if log_file not in self.log_files:
+        log_key = kwargs.get("pk")
+        if log_key not in self.ALLOWED_LOG_FILES:
             raise Http404
 
-        filename = self.get_log_filename(log_file)
+        log_file = self.get_log_file(log_key)
 
-        if not os.path.isfile(filename):
+        if not log_file.is_file():
             raise Http404
 
-        with open(filename) as f:
+        with log_file.open() as f:
             lines = [line.rstrip() for line in f.readlines()]
 
         return Response(lines)
 
     def list(self, request, *args, **kwargs):
-        exist = [
-            log for log in self.log_files if os.path.isfile(self.get_log_filename(log))
+        existing_logs = [
+            log_key
+            for log_key in self.ALLOWED_LOG_FILES
+            if self.get_log_file(log_key).is_file()
         ]
-        return Response(exist)
+        return Response(existing_logs)
 
 
 class SavedViewViewSet(ModelViewSet, PassUserMixin):
@@ -2073,7 +2079,7 @@ class BulkDownloadView(GenericAPIView):
                 strategy.add_document(document)
 
         # TODO(stumpylog): Investigate using FileResponse here
-        with open(temp.name, "rb") as f:
+        with Path(temp.name).open("rb") as f:
             response = HttpResponse(f, content_type="application/zip")
             response["Content-Disposition"] = '{}; filename="{}"'.format(
                 "attachment",
index 5ac4db7ff0c5b2532e2c7a1b5fd3181a3e94f346..75e466d061f623d6d2d6ced859b10b219dd3b35b 100644 (file)
@@ -3,6 +3,7 @@ import os
 import pwd
 import shutil
 import stat
+from pathlib import Path
 
 from django.conf import settings
 from django.core.checks import Error
@@ -19,26 +20,23 @@ writeable_hint = (
 )
 
 
-def path_check(var, directory):
-    messages = []
+def path_check(var, directory: Path) -> list[Error]:
+    messages: list[Error] = []
     if directory:
-        if not os.path.isdir(directory):
+        if not directory.is_dir():
             messages.append(
                 Error(exists_message.format(var), exists_hint.format(directory)),
             )
         else:
-            test_file = os.path.join(
-                directory,
-                f"__paperless_write_test_{os.getpid()}__",
-            )
+            test_file: Path = directory / f"__paperless_write_test_{os.getpid()}__"
             try:
-                with open(test_file, "w"):
+                with test_file.open("w"):
                     pass
             except PermissionError:
-                dir_stat = os.stat(directory)
-                dir_mode = stat.filemode(dir_stat.st_mode)
-                dir_owner = pwd.getpwuid(dir_stat.st_uid).pw_name
-                dir_group = grp.getgrgid(dir_stat.st_gid).gr_name
+                dir_stat: os.stat_result = Path(directory).stat()
+                dir_mode: str = stat.filemode(dir_stat.st_mode)
+                dir_owner: str = pwd.getpwuid(dir_stat.st_uid).pw_name
+                dir_group: str = grp.getgrgid(dir_stat.st_gid).gr_name
                 messages.append(
                     Error(
                         writeable_message.format(var),
@@ -48,14 +46,18 @@ def path_check(var, directory):
                     ),
                 )
             finally:
-                if os.path.isfile(test_file):
-                    os.remove(test_file)
+                try:
+                    if test_file.is_file():
+                        test_file.unlink()
+                except (PermissionError, OSError):
+                    # Skip cleanup if we can't access the file — expected in permission tests
+                    pass
 
     return messages
 
 
 @register()
-def paths_check(app_configs, **kwargs):
+def paths_check(app_configs, **kwargs) -> list[Error]:
     """
     Check the various paths for existence, readability and writeability
     """
index a064a0d59f5a1efa47d03b1aaddc5aa1862d5cd7..f1909112bef2d9100f164ee801e44a39a0434561 100644 (file)
@@ -27,9 +27,9 @@ class TestChecks(DirectoriesMixin, TestCase):
         self.assertEqual(paths_check(None), [])
 
     @override_settings(
-        MEDIA_ROOT="uuh",
-        DATA_DIR="whatever",
-        CONSUMPTION_DIR="idontcare",
+        MEDIA_ROOT=Path("uuh"),
+        DATA_DIR=Path("whatever"),
+        CONSUMPTION_DIR=Path("idontcare"),
     )
     def test_paths_check_dont_exist(self):
         msgs = paths_check(None)
index 3d020da26baf28c7484d2fafb19b1c3a34c4168f..b62e37166a0fc5f2635722cf8f8503300bd13f80 100644 (file)
@@ -1,7 +1,6 @@
 import datetime
 import itertools
 import logging
-import os
 import ssl
 import tempfile
 import traceback
@@ -484,7 +483,7 @@ class MailAccountHandler(LoggingMixin):
             return message.subject
 
         elif rule.assign_title_from == MailRule.TitleSource.FROM_FILENAME:
-            return os.path.splitext(os.path.basename(att.filename))[0]
+            return Path(att.filename).stem
 
         elif rule.assign_title_from == MailRule.TitleSource.NONE:
             return None
@@ -908,7 +907,7 @@ class MailAccountHandler(LoggingMixin):
             dir=settings.SCRATCH_DIR,
             suffix=".eml",
         )
-        with open(temp_filename, "wb") as f:
+        with Path(temp_filename).open("wb") as f:
             # Move "From"-header to beginning of file
             # TODO: This ugly workaround is needed because the parser is
             #   chosen only by the mime_type detected via magic
index 514b7163ffa9fd9a6c826fae2dc5830dce730f05..09fc323aed3dadbe486c8ce9f75ad9507e9828bc 100644 (file)
@@ -1,4 +1,3 @@
-import os
 import shutil
 import tempfile
 import uuid
@@ -70,13 +69,13 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(uuid.uuid4())
         page_count = parser.get_page_count(
-            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+            (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertEqual(page_count, 1)
 
         page_count = parser.get_page_count(
-            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
+            (self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(),
             "application/pdf",
         )
         self.assertEqual(page_count, 6)
@@ -93,7 +92,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         parser = RasterisedDocumentParser(uuid.uuid4())
         with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm:
             page_count = parser.get_page_count(
-                os.path.join(self.SAMPLE_FILES, "password-protected.pdf"),
+                (self.SAMPLE_FILES / "password-protected.pdf").as_posix(),
                 "application/pdf",
             )
             self.assertEqual(page_count, None)
@@ -102,7 +101,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_thumbnail(self):
         parser = RasterisedDocumentParser(uuid.uuid4())
         thumb = parser.get_thumbnail(
-            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+            (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(thumb)
@@ -119,7 +118,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 
         parser = RasterisedDocumentParser(uuid.uuid4())
         thumb = parser.get_thumbnail(
-            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+            (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(thumb)
@@ -127,7 +126,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_thumbnail_encrypted(self):
         parser = RasterisedDocumentParser(uuid.uuid4())
         thumb = parser.get_thumbnail(
-            os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
+            (self.SAMPLE_FILES / "encrypted.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(thumb)
@@ -135,17 +134,17 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_get_dpi(self):
         parser = RasterisedDocumentParser(None)
 
-        dpi = parser.get_dpi(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"))
+        dpi = parser.get_dpi((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix())
         self.assertEqual(dpi, None)
 
-        dpi = parser.get_dpi(os.path.join(self.SAMPLE_FILES, "simple.png"))
+        dpi = parser.get_dpi((self.SAMPLE_FILES / "simple.png").as_posix())
         self.assertEqual(dpi, 72)
 
     def test_simple_digital(self):
         parser = RasterisedDocumentParser(None)
 
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+            (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -157,7 +156,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         parser = RasterisedDocumentParser(None)
 
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
+            (self.SAMPLE_FILES / "with-form.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -173,7 +172,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         parser = RasterisedDocumentParser(None)
 
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
+            (self.SAMPLE_FILES / "with-form.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -187,7 +186,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_signed(self):
         parser = RasterisedDocumentParser(None)
 
-        parser.parse(os.path.join(self.SAMPLE_FILES, "signed.pdf"), "application/pdf")
+        parser.parse((self.SAMPLE_FILES / "signed.pdf").as_posix(), "application/pdf")
 
         self.assertIsNone(parser.archive_path)
         self.assertContainsStrings(
@@ -203,7 +202,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         parser = RasterisedDocumentParser(None)
 
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
+            (self.SAMPLE_FILES / "encrypted.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -214,7 +213,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_with_form_error_notext(self):
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
+            (self.SAMPLE_FILES / "with-form.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -228,7 +227,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         parser = RasterisedDocumentParser(None)
 
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
+            (self.SAMPLE_FILES / "with-form.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -240,7 +239,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_image_simple(self):
         parser = RasterisedDocumentParser(None)
 
-        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.png"), "image/png")
+        parser.parse((self.SAMPLE_FILES / "simple.png").as_posix(), "image/png")
 
         self.assertIsFile(parser.archive_path)
 
@@ -252,11 +251,11 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         with tempfile.TemporaryDirectory() as tempdir:
             # Copy sample file to temp directory, as the parsing changes the file
             # and this makes it modified to Git
-            sample_file = os.path.join(self.SAMPLE_FILES, "simple-alpha.png")
-            dest_file = os.path.join(tempdir, "simple-alpha.png")
+            sample_file = self.SAMPLE_FILES / "simple-alpha.png"
+            dest_file = Path(tempdir) / "simple-alpha.png"
             shutil.copy(sample_file, dest_file)
 
-            parser.parse(dest_file, "image/png")
+            parser.parse(dest_file.as_posix(), "image/png")
 
             self.assertIsFile(parser.archive_path)
 
@@ -266,7 +265,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         parser = RasterisedDocumentParser(None)
 
         dpi = parser.calculate_a4_dpi(
-            os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
+            (self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(),
         )
 
         self.assertEqual(dpi, 62)
@@ -278,7 +277,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 
         def f():
             parser.parse(
-                os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
+                (self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(),
                 "image/png",
             )
 
@@ -288,7 +287,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_image_no_dpi_default(self):
         parser = RasterisedDocumentParser(None)
 
-        parser.parse(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png")
+        parser.parse((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), "image/png")
 
         self.assertIsFile(parser.archive_path)
 
@@ -300,7 +299,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_multi_page(self):
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(parser.archive_path)
@@ -313,7 +312,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_multi_page_pages_skip(self):
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(parser.archive_path)
@@ -326,7 +325,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_multi_page_pages_redo(self):
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(parser.archive_path)
@@ -339,7 +338,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_multi_page_pages_force(self):
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(parser.archive_path)
@@ -352,7 +351,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     def test_multi_page_analog_pages_skip(self):
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
+            (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(parser.archive_path)
@@ -376,7 +375,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
+            (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(parser.archive_path)
@@ -398,7 +397,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
+            (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsFile(parser.archive_path)
@@ -420,7 +419,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNone(parser.archive_path)
@@ -443,7 +442,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
+            (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -468,7 +467,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNotNone(parser.archive_path)
@@ -491,7 +490,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
+            (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNotNone(parser.archive_path)
@@ -514,7 +513,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNone(parser.archive_path)
@@ -537,7 +536,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
+            (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNotNone(parser.archive_path)
@@ -560,7 +559,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
+            (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNone(parser.archive_path)
@@ -583,7 +582,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
+            (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNone(parser.archive_path)
@@ -606,7 +605,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
+            (self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNotNone(parser.archive_path)
@@ -616,7 +615,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             ["page 1", "page 2", "page 3", "page 4", "page 5", "page 6"],
         )
 
-        with open(os.path.join(parser.tempdir, "sidecar.txt")) as f:
+        with (parser.tempdir / "sidecar.txt").open() as f:
             sidecar = f.read()
 
         self.assertIn("[OCR skipped on page(s) 4-6]", sidecar)
@@ -637,7 +636,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "single-page-mixed.pdf"),
+            (self.SAMPLE_FILES / "single-page-mixed.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNotNone(parser.archive_path)
@@ -651,7 +650,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             ],
         )
 
-        with open(os.path.join(parser.tempdir, "sidecar.txt")) as f:
+        with (parser.tempdir / "sidecar.txt").open() as f:
             sidecar = f.read().lower()
 
         self.assertIn("this is some text, but in an image, also on page 1.", sidecar)
@@ -674,7 +673,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
+            (self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(),
             "application/pdf",
         )
         self.assertIsNone(parser.archive_path)
@@ -686,7 +685,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
     @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)
     def test_rotate(self):
         parser = RasterisedDocumentParser(None)
-        parser.parse(os.path.join(self.SAMPLE_FILES, "rotated.pdf"), "application/pdf")
+        parser.parse((self.SAMPLE_FILES / "rotated.pdf").as_posix(), "application/pdf")
         self.assertContainsStrings(
             parser.get_text(),
             [
@@ -708,7 +707,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         parser = RasterisedDocumentParser(None)
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "multi-page-images.tiff"),
+            (self.SAMPLE_FILES / "multi-page-images.tiff").as_posix(),
             "image/tiff",
         )
         self.assertIsFile(parser.archive_path)
@@ -728,7 +727,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             - Text from all pages extracted
         """
         parser = RasterisedDocumentParser(None)
-        sample_file = os.path.join(self.SAMPLE_FILES, "multi-page-images-alpha.tiff")
+        sample_file = self.SAMPLE_FILES / "multi-page-images-alpha.tiff"
         with tempfile.NamedTemporaryFile() as tmp_file:
             shutil.copy(sample_file, tmp_file.name)
             parser.parse(
@@ -753,10 +752,9 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             - Text from all pages extracted
         """
         parser = RasterisedDocumentParser(None)
-        sample_file = os.path.join(
-            self.SAMPLE_FILES,
-            "multi-page-images-alpha-rgb.tiff",
-        )
+        sample_file = (
+            self.SAMPLE_FILES / "multi-page-images-alpha-rgb.tiff"
+        ).as_posix()
         with tempfile.NamedTemporaryFile() as tmp_file:
             shutil.copy(sample_file, tmp_file.name)
             parser.parse(
@@ -845,7 +843,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         parser = RasterisedDocumentParser(None)
 
         parser.parse(
-            os.path.join(self.SAMPLE_FILES, "rtl-test.pdf"),
+            (self.SAMPLE_FILES / "rtl-test.pdf").as_posix(),
             "application/pdf",
         )
 
@@ -860,49 +858,52 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         self.assertRaises(
             ParseError,
             parser.parse,
-            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+            (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
             "application/pdf",
         )
 
 
 class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
-    SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
+    SAMPLE_FILES = Path(__file__).parent / "samples"
 
     def test_bmp(self):
         parser = RasterisedDocumentParser(None)
-        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.bmp"), "image/bmp")
+        parser.parse((self.SAMPLE_FILES / "simple.bmp").as_posix(), "image/bmp")
         self.assertIsFile(parser.archive_path)
         self.assertIn("this is a test document", parser.get_text().lower())
 
     def test_jpg(self):
         parser = RasterisedDocumentParser(None)
-        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.jpg"), "image/jpeg")
+        parser.parse((self.SAMPLE_FILES / "simple.jpg").as_posix(), "image/jpeg")
         self.assertIsFile(parser.archive_path)
         self.assertIn("this is a test document", parser.get_text().lower())
 
     def test_heic(self):
         parser = RasterisedDocumentParser(None)
-        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.heic"), "image/heic")
+        parser.parse((self.SAMPLE_FILES / "simple.heic").as_posix(), "image/heic")
         self.assertIsFile(parser.archive_path)
         self.assertIn("pizza", parser.get_text().lower())
 
     @override_settings(OCR_IMAGE_DPI=200)
     def test_gif(self):
         parser = RasterisedDocumentParser(None)
-        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.gif"), "image/gif")
+        parser.parse((self.SAMPLE_FILES / "simple.gif").as_posix(), "image/gif")
         self.assertIsFile(parser.archive_path)
         self.assertIn("this is a test document", parser.get_text().lower())
 
     def test_tiff(self):
         parser = RasterisedDocumentParser(None)
-        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.tif"), "image/tiff")
+        parser.parse((self.SAMPLE_FILES / "simple.tif").as_posix(), "image/tiff")
         self.assertIsFile(parser.archive_path)
         self.assertIn("this is a test document", parser.get_text().lower())
 
     @override_settings(OCR_IMAGE_DPI=72)
     def test_webp(self):
         parser = RasterisedDocumentParser(None)
-        parser.parse(os.path.join(self.SAMPLE_FILES, "document.webp"), "image/webp")
+        parser.parse(
+            (self.SAMPLE_FILES / "document.webp").as_posix(),
+            "image/webp",
+        )
         self.assertIsFile(parser.archive_path)
         # Older tesseracts consistently mangle the space between "a webp",
         # tesseract 5.3.0 seems to do a better job, so we're accepting both