]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
Make the number pattern regular expression more efficient (#1213)
authorAarni Koskela <akx@iki.fi>
Thu, 3 Jul 2025 14:02:16 +0000 (17:02 +0300)
committerGitHub <noreply@github.com>
Thu, 3 Jul 2025 14:02:16 +0000 (17:02 +0300)
I verified that all patterns parsed for importing CLDR data are parsed equivalently using the new regular expression.

The inefficient regular expression was brought to our attention by GitHub user s-sanskar – thanks!

Co-authored-by: s-sanskar <sanskarpok11@gmail.com>
babel/numbers.py

index aad1583a139cdfc5fcdc8a0e9997dad0b11b2dea..455ba9471097d6c5af682e3e461b1f8a7ab5741e 100644 (file)
@@ -1201,14 +1201,11 @@ def _remove_trailing_zeros_after_decimal(string: str, decimal_symbol: str) -> st
     return string
 
 
-PREFIX_END = r'[^0-9@#.,]'
-NUMBER_TOKEN = r'[0-9@#.,E+]'
-
-PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END
-NUMBER_PATTERN = r"(?P<number>%s*)" % NUMBER_TOKEN
-SUFFIX_PATTERN = r"(?P<suffix>.*)"
-
-number_re = re.compile(f"{PREFIX_PATTERN}{NUMBER_PATTERN}{SUFFIX_PATTERN}")
+_number_pattern_re = re.compile(
+    r"(?P<prefix>(?:[^'0-9@#.,]|'[^']*')*)"
+    r"(?P<number>[0-9@#.,E+]*)"
+    r"(?P<suffix>.*)",
+)
 
 
 def parse_grouping(p: str) -> tuple[int, int]:
@@ -1239,7 +1236,7 @@ def parse_pattern(pattern: NumberPattern | str) -> NumberPattern:
         return pattern
 
     def _match_number(pattern):
-        rv = number_re.search(pattern)
+        rv = _number_pattern_re.search(pattern)
         if rv is None:
             raise ValueError(f"Invalid number pattern {pattern!r}")
         return rv.groups()