From: XstreamGit Date: Thu, 6 Feb 2025 19:09:35 +0000 (+0100) Subject: Tweak: improve date matching regex for dates after numbers (#8964) X-Git-Tag: v2.15.0-beta.rc1~126 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=046d8456e29d6e96bfd1e150b80015df0c381272;p=thirdparty%2Fpaperless-ngx.git Tweak: improve date matching regex for dates after numbers (#8964) --- diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 2d73dc63f..d840817e4 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -41,7 +41,7 @@ DATE_REGEX = re.compile( r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|" - r"(\b|(?!=([_-])))(\d{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|" + r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))", re.IGNORECASE, )