]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fix: Modify one of date regexes (#5540)
authorshamoon <4887959+shamoon@users.noreply.github.com>
Thu, 25 Jan 2024 21:29:22 +0000 (13:29 -0800)
committerGitHub <noreply@github.com>
Thu, 25 Jan 2024 21:29:22 +0000 (21:29 +0000)
src/documents/parsers.py
src/documents/tests/test_date_parsing.py

index db4b42792cd11b2ae38e388568baa7ac88500edf..12e5d6b333f4372c43b1a6c184517b7cd9a2f46c 100644 (file)
@@ -38,7 +38,7 @@ from documents.utils import copy_file_with_basic_stats
 DATE_REGEX = re.compile(
     r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|"
     r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|"
+    r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} [0-9]{4}|[a-zA-Z]{3,9} [0-9]{1,2}, [0-9]{4})(\b|(?=([_-])))|"
     r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
     r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))|"
     r"(\b|(?!=([_-])))([0-9]{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-][0-9]{4})(\b|(?=([_-])))|"
index 54b4d7b5325594353ee323d3eacbe3a743a0337d..d4ea71be55f30d0203654bd6e14520c93e51157d 100644 (file)
@@ -201,6 +201,13 @@ class TestDate(TestCase):
             datetime.datetime(2022, 3, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
         )
 
+    def test_date_format_26(self):
+        text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051"
+        self.assertEqual(
+            parse_date("", text),
+            datetime.datetime(2019, 9, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
+        )
+
     def test_crazy_date_past(self, *args):
         self.assertIsNone(parse_date("", "01-07-0590 00:00:00"))