From: Aarni Koskela Date: Mon, 4 Jan 2016 17:01:52 +0000 (+0200) Subject: dates: add split_interval_pattern and untokenize_pattern X-Git-Tag: 2.3.1~26^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=516113b33e027a1a11b0740ddf26eccaedec90f6;p=thirdparty%2Fbabel.git dates: add split_interval_pattern and untokenize_pattern --- diff --git a/babel/dates.py b/babel/dates.py index 8f444f0e..b5c670b4 100644 --- a/babel/dates.py +++ b/babel/dates.py @@ -1299,3 +1299,62 @@ def tokenize_pattern(pattern): append_chars() return result + + +def untokenize_pattern(tokens): + """ + Turn a date format pattern token stream back into a string. + + This is the reverse operation of ``tokenize_pattern``. + + :type tokens: Iterable[tuple] + :rtype: str + """ + output = [] + for tok_type, tok_value in tokens: + if tok_type == "field": + output.append(tok_value[0] * tok_value[1]) + elif tok_type == "chars": + if not any(ch in PATTERN_CHARS for ch in tok_value): # No need to quote + output.append(tok_value) + else: + output.append("'%s'" % tok_value.replace("'", "''")) + return "".join(output) + + +def split_interval_pattern(pattern): + """ + Split an interval-describing datetime pattern into multiple pieces. + + > The pattern is then designed to be broken up into two pieces by determining the first repeating field. + - http://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats + + >>> split_interval_pattern(u'E d.M. \u2013 E d.M.') + [u'E d.M. \u2013 ', 'E d.M.'] + >>> split_interval_pattern("Y 'text' Y 'more text'") + ["Y 'text '", "Y 'more text'"] + >>> split_interval_pattern(u"E, MMM d \u2013 E") + [u'E, MMM d \u2013 ', u'E'] + >>> split_interval_pattern("MMM d") + ['MMM d'] + >>> split_interval_pattern("y G") + ['y G'] + >>> split_interval_pattern(u"MMM d \u2013 d") + [u'MMM d \u2013 ', u'd'] + + :param pattern: Interval pattern string + :return: list of "subpatterns" + """ + + seen_fields = set() + parts = [[]] + + for tok_type, tok_value in tokenize_pattern(pattern): + if tok_type == "field": + if tok_value[0] in seen_fields: # Repeated field + parts.append([]) + seen_fields.clear() + seen_fields.add(tok_value[0]) + parts[-1].append((tok_type, tok_value)) + + return [untokenize_pattern(tokens) for tokens in parts]