]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fix: Test metadata items for Unicode issues (#5707)
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Fri, 9 Feb 2024 20:08:23 +0000 (12:08 -0800)
committerGitHub <noreply@github.com>
Fri, 9 Feb 2024 20:08:23 +0000 (20:08 +0000)
Test each key for unicode issues and reject ones which will fail inside DRF

src/paperless_mail/parsers.py
src/paperless_tesseract/parsers.py

index 92fb90bb151f201db77f8f12136eef2493afb23a..b8cf129803f9139ccc4a4e769aa7b148e37c2c7a 100644 (file)
@@ -69,6 +69,11 @@ class MailDocumentParser(DocumentParser):
 
         for key, value in mail.headers.items():
             value = ", ".join(i for i in value)
+            try:
+                value.encode("utf-8")
+            except UnicodeEncodeError as e:  # pragma: no cover
+                self.log.debug(f"Skipping header {key}: {e}")
+                continue
 
             result.append(
                 {
index b6baa32893df1af3db6006728531a37d988ca6fa..09086585eff9f94b4ba2d60952e49d0cf5578372 100644 (file)
@@ -55,11 +55,21 @@ class RasterisedDocumentParser(DocumentParser):
                 value = str(value)
                 try:
                     m = namespace_pattern.match(key)
+                    if m is None:  # pragma: no cover
+                        continue
+                    namespace = m.group(1)
+                    key_value = m.group(2)
+                    try:
+                        namespace.encode("utf-8")
+                        key_value.encode("utf-8")
+                    except UnicodeEncodeError as e:  # pragma: no cover
+                        self.log.debug(f"Skipping metadata key {key}: {e}")
+                        continue
                     result.append(
                         {
-                            "namespace": m.group(1),
-                            "prefix": meta.REVERSE_NS[m.group(1)],
-                            "key": m.group(2),
+                            "namespace": namespace,
+                            "prefix": meta.REVERSE_NS[namespace],
+                            "key": key_value,
                             "value": value,
                         },
                     )