]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Tweak: improve date matching regex for dates after numbers (#8964)
authorXstreamGit <mail@gregorgruber.de>
Thu, 6 Feb 2025 19:09:35 +0000 (20:09 +0100)
committerGitHub <noreply@github.com>
Thu, 6 Feb 2025 19:09:35 +0000 (19:09 +0000)
src/documents/parsers.py

index 2d73dc63ffc4618933da41ac0300361b5b491113..d840817e4f5ba8091a9695205246ac1c0c74de9e 100644 (file)
@@ -41,7 +41,7 @@ DATE_REGEX = re.compile(
     r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|"
     r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|"
     r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))(\d{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
+    r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
     r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))",
     re.IGNORECASE,
 )