From 69c5db8c21090a1f6a71211140c150024a4a189d Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Thu, 3 Jul 2025 17:02:16 +0300 Subject: [PATCH] Make the number pattern regular expression more efficient (#1213) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit I verified that all patterns parsed for importing CLDR data are parsed equivalently using the new regular expression. The inefficient regular expression was brought to our attention by GitHub user s-sanskar – thanks! Co-authored-by: s-sanskar --- babel/numbers.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/babel/numbers.py b/babel/numbers.py index aad1583a..455ba947 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -1201,14 +1201,11 @@ def _remove_trailing_zeros_after_decimal(string: str, decimal_symbol: str) -> st return string -PREFIX_END = r'[^0-9@#.,]' -NUMBER_TOKEN = r'[0-9@#.,E+]' - -PREFIX_PATTERN = r"(?P(?:'[^']*'|%s)*)" % PREFIX_END -NUMBER_PATTERN = r"(?P%s*)" % NUMBER_TOKEN -SUFFIX_PATTERN = r"(?P.*)" - -number_re = re.compile(f"{PREFIX_PATTERN}{NUMBER_PATTERN}{SUFFIX_PATTERN}") +_number_pattern_re = re.compile( + r"(?P(?:[^'0-9@#.,]|'[^']*')*)" + r"(?P[0-9@#.,E+]*)" + r"(?P.*)", +) def parse_grouping(p: str) -> tuple[int, int]: @@ -1239,7 +1236,7 @@ def parse_pattern(pattern: NumberPattern | str) -> NumberPattern: return pattern def _match_number(pattern): - rv = number_re.search(pattern) + rv = _number_pattern_re.search(pattern) if rv is None: raise ValueError(f"Invalid number pattern {pattern!r}") return rv.groups() -- 2.47.2