From: Trenton Holmes Date: Wed, 14 Sep 2022 14:48:12 +0000 (-0700) Subject: Ensure the tika parse function gets a string, not a PathLike X-Git-Tag: v1.9.0-beta.rc3~7^2 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F1591%2Fhead;p=thirdparty%2Fpaperless-ngx.git Ensure the tika parse function gets a string, not a PathLike --- diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index e706e3aa5f..1cfb1eecbb 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import dateutil.parser import requests @@ -28,6 +29,11 @@ class TikaDocumentParser(DocumentParser): def extract_metadata(self, document_path, mime_type): tika_server = settings.TIKA_ENDPOINT + + # tika does not support a PathLike, only strings + # ensure this is a string + document_path = str(document_path) + try: parsed = parser.from_file(document_path, tika_server) except Exception as e: @@ -47,10 +53,14 @@ class TikaDocumentParser(DocumentParser): for key in parsed["metadata"] ] - def parse(self, document_path, mime_type, file_name=None): + def parse(self, document_path: Path, mime_type, file_name=None): self.log("info", f"Sending {document_path} to Tika server") tika_server = settings.TIKA_ENDPOINT + # tika does not support a PathLike, only strings + # ensure this is a string + document_path = str(document_path) + try: parsed = parser.from_file(document_path, tika_server) except Exception as err: