import os
+from pathlib import Path
import dateutil.parser
import requests
def extract_metadata(self, document_path, mime_type):
tika_server = settings.TIKA_ENDPOINT
+
+ # tika does not support a PathLike, only strings
+ # ensure this is a string
+ document_path = str(document_path)
+
try:
parsed = parser.from_file(document_path, tika_server)
except Exception as e:
for key in parsed["metadata"]
]
- def parse(self, document_path, mime_type, file_name=None):
+ def parse(self, document_path: Path, mime_type, file_name=None):
self.log("info", f"Sending {document_path} to Tika server")
tika_server = settings.TIKA_ENDPOINT
+ # tika does not support a PathLike, only strings
+ # ensure this is a string
+ document_path = str(document_path)
+
try:
parsed = parser.from_file(document_path, tika_server)
except Exception as err: