]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Ensure the tika parse function gets a string, not a PathLike 1591/head
authorTrenton Holmes <holmes.trenton@gmail.com>
Wed, 14 Sep 2022 14:48:12 +0000 (07:48 -0700)
committerTrenton Holmes <holmes.trenton@gmail.com>
Wed, 14 Sep 2022 14:48:12 +0000 (07:48 -0700)
src/paperless_tika/parsers.py

index e706e3aa5f320dd82a29c8fa369eddc2decfb02d..1cfb1eecbb8832ee4421ac61670e0f953d8e63ac 100644 (file)
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 
 import dateutil.parser
 import requests
@@ -28,6 +29,11 @@ class TikaDocumentParser(DocumentParser):
 
     def extract_metadata(self, document_path, mime_type):
         tika_server = settings.TIKA_ENDPOINT
+
+        # tika does not support a PathLike, only strings
+        # ensure this is a string
+        document_path = str(document_path)
+
         try:
             parsed = parser.from_file(document_path, tika_server)
         except Exception as e:
@@ -47,10 +53,14 @@ class TikaDocumentParser(DocumentParser):
             for key in parsed["metadata"]
         ]
 
-    def parse(self, document_path, mime_type, file_name=None):
+    def parse(self, document_path: Path, mime_type, file_name=None):
         self.log("info", f"Sending {document_path} to Tika server")
         tika_server = settings.TIKA_ENDPOINT
 
+        # tika does not support a PathLike, only strings
+        # ensure this is a string
+        document_path = str(document_path)
+
         try:
             parsed = parser.from_file(document_path, tika_server)
         except Exception as err: