From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:29:22 +0000 (-0800) Subject: Fix: Modify one of date regexes (#5540) X-Git-Tag: v2.4.2~1^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ba0f4718e516fb220ccccad6a7ae5a189792cb29;p=thirdparty%2Fpaperless-ngx.git Fix: Modify one of date regexes (#5540) --- diff --git a/src/documents/parsers.py b/src/documents/parsers.py index db4b42792c..12e5d6b333 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -38,7 +38,7 @@ from documents.utils import copy_file_with_basic_stats DATE_REGEX = re.compile( r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" - r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" + r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} [0-9]{4}|[a-zA-Z]{3,9} [0-9]{1,2}, [0-9]{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([0-9]{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-][0-9]{4})(\b|(?=([_-])))|" diff --git a/src/documents/tests/test_date_parsing.py b/src/documents/tests/test_date_parsing.py index 54b4d7b532..d4ea71be55 100644 --- a/src/documents/tests/test_date_parsing.py +++ b/src/documents/tests/test_date_parsing.py @@ -201,6 +201,13 @@ class TestDate(TestCase): datetime.datetime(2022, 3, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), ) + def test_date_format_26(self): + text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051" + self.assertEqual( + parse_date("", text), + datetime.datetime(2019, 9, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + ) + def test_crazy_date_past(self, *args): self.assertIsNone(parse_date("", "01-07-0590 00:00:00"))