['Words', 'words', 'words', '']
>>> re.split(r'(\W+)', 'Words, words, words.')
['Words', ', ', 'words', ', ', 'words', '.', '']
- >>> re.split(r'\W+', 'Words, words, words.', 1)
+ >>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
['Words', 'words, words.']
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
['0', '3', '9']
.. versionchanged:: 3.7
Added support of splitting on a pattern that could match an empty string.
+ .. deprecated:: 3.13
+ Passing *maxsplit* and *flags* as positional arguments is deprecated.
+ In future Python versions they will be
+ :ref:`keyword-only parameters <keyword-only_parameter>`.
+
.. function:: findall(pattern, string, flags=0)
.. versionchanged:: 3.7
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
now are errors.
-
- .. versionchanged:: 3.7
Empty matches for the pattern are replaced when adjacent to a previous
non-empty match.
In :class:`bytes` replacement strings, group *name* can only contain bytes
in the ASCII range (``b'\x00'``-``b'\x7f'``).
+ .. deprecated:: 3.13
+ Passing *count* and *flags* as positional arguments is deprecated.
+ In future Python versions they will be
+ :ref:`keyword-only parameters <keyword-only_parameter>`.
+
.. function:: subn(pattern, repl, string, count=0, flags=0)
Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
number_of_subs_made)``.
- .. versionchanged:: 3.1
- Added the optional flags argument.
-
- .. versionchanged:: 3.5
- Unmatched groups are replaced with an empty string.
-
.. function:: escape(pattern)
.. doctest::
:options: +NORMALIZE_WHITESPACE
- >>> [re.split(":? ", entry, 3) for entry in entries]
+ >>> [re.split(":? ", entry, maxsplit=3) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'],
.. doctest::
:options: +NORMALIZE_WHITESPACE
- >>> [re.split(":? ", entry, 4) for entry in entries]
+ >>> [re.split(":? ", entry, maxsplit=4) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'],
Deprecated
----------
+* Passing optional arguments *maxsplit*, *count* and *flags* in module-level
+ functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional
+ arguments is now deprecated.
+ In future Python versions these parameters will be
+ :ref:`keyword-only <keyword-only_parameter>`.
+ (Contributed by Serhiy Storchaka in :gh:`56166`.)
+
* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
-def sub(pattern, repl, string, count=0, flags=0):
+class _ZeroSentinel(int):
+ pass
+_zero_sentinel = _ZeroSentinel()
+
+def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return
a replacement string to be used."""
+ if args:
+ if count is not _zero_sentinel:
+ raise TypeError("sub() got multiple values for argument 'count'")
+ count, *args = args
+ if args:
+ if flags is not _zero_sentinel:
+ raise TypeError("sub() got multiple values for argument 'flags'")
+ flags, *args = args
+ if args:
+ raise TypeError("sub() takes from 3 to 5 positional arguments "
+ "but %d were given" % (5 + len(args)))
+
+ import warnings
+ warnings.warn(
+ "'count' is passed as positional argument",
+ DeprecationWarning, stacklevel=2
+ )
+
return _compile(pattern, flags).sub(repl, string, count)
+sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
-def subn(pattern, repl, string, count=0, flags=0):
+def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
+ if args:
+ if count is not _zero_sentinel:
+ raise TypeError("subn() got multiple values for argument 'count'")
+ count, *args = args
+ if args:
+ if flags is not _zero_sentinel:
+ raise TypeError("subn() got multiple values for argument 'flags'")
+ flags, *args = args
+ if args:
+ raise TypeError("subn() takes from 3 to 5 positional arguments "
+ "but %d were given" % (5 + len(args)))
+
+ import warnings
+ warnings.warn(
+ "'count' is passed as positional argument",
+ DeprecationWarning, stacklevel=2
+ )
+
return _compile(pattern, flags).subn(repl, string, count)
+subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
-def split(pattern, string, maxsplit=0, flags=0):
+def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
"""Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all
list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element
of the list."""
+ if args:
+ if maxsplit is not _zero_sentinel:
+ raise TypeError("split() got multiple values for argument 'maxsplit'")
+ maxsplit, *args = args
+ if args:
+ if flags is not _zero_sentinel:
+ raise TypeError("split() got multiple values for argument 'flags'")
+ flags, *args = args
+ if args:
+ raise TypeError("split() takes from 2 to 4 positional arguments "
+ "but %d were given" % (4 + len(args)))
+
+ import warnings
+ warnings.warn(
+ "'maxsplit' is passed as positional argument",
+ DeprecationWarning, stacklevel=2
+ )
+
return _compile(pattern, flags).split(string, maxsplit)
+split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string.
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
- self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
- '9.3 -3 23x99y')
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
+ '9.3 -3 23x99y')
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y')
def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
- self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
+ with self.assertRaisesRegex(TypeError,
+ r"sub\(\) got multiple values for argument 'count'"):
+ re.sub('a', 'b', 'aaaaa', 1, count=1)
+ with self.assertRaisesRegex(TypeError,
+ r"sub\(\) got multiple values for argument 'flags'"):
+ re.sub('a', 'b', 'aaaaa', 1, 0, flags=0)
+ with self.assertRaisesRegex(TypeError,
+ r"sub\(\) takes from 3 to 5 positional arguments but 6 "
+ r"were given"):
+ re.sub('a', 'b', 'aaaaa', 1, 0, 0)
+
+ def test_misuse_flags(self):
+ with self.assertWarns(DeprecationWarning) as w:
+ result = re.sub('a', 'b', 'aaaaa', re.I)
+ self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
+ self.assertEqual(str(w.warning),
+ "'count' is passed as positional argument")
+ self.assertEqual(w.filename, __file__)
+ with self.assertWarns(DeprecationWarning) as w:
+ result = re.subn("b*", "x", "xyz", re.I)
+ self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
+ self.assertEqual(str(w.warning),
+ "'count' is passed as positional argument")
+ self.assertEqual(w.filename, __file__)
+ with self.assertWarns(DeprecationWarning) as w:
+ result = re.split(":", ":a:b::c", re.I)
+ self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
+ self.assertEqual(str(w.warning),
+ "'maxsplit' is passed as positional argument")
+ self.assertEqual(w.filename, __file__)
+
def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
'hello there')
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
- self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
+ with self.assertRaisesRegex(TypeError,
+ r"subn\(\) got multiple values for argument 'count'"):
+ re.subn('a', 'b', 'aaaaa', 1, count=1)
+ with self.assertRaisesRegex(TypeError,
+ r"subn\(\) got multiple values for argument 'flags'"):
+ re.subn('a', 'b', 'aaaaa', 1, 0, flags=0)
+ with self.assertRaisesRegex(TypeError,
+ r"subn\(\) takes from 3 to 5 positional arguments but 6 "
+ r"were given"):
+ re.subn('a', 'b', 'aaaaa', 1, 0, 0)
+
def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string),
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
def test_qualified_re_split(self):
- self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
+ with self.assertWarns(DeprecationWarning) as w:
+ self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
+ self.assertEqual(w.filename, __file__)
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
['', ':', '', '', 'a:b::c'])
+ with self.assertRaisesRegex(TypeError,
+ r"split\(\) got multiple values for argument 'maxsplit'"):
+ re.split(":", ":a:b::c", 2, maxsplit=2)
+ with self.assertRaisesRegex(TypeError,
+ r"split\(\) got multiple values for argument 'flags'"):
+ re.split(":", ":a:b::c", 2, 0, flags=0)
+ with self.assertRaisesRegex(TypeError,
+ r"split\(\) takes from 2 to 4 positional arguments but 5 "
+ r"were given"):
+ re.split(":", ":a:b::c", 2, 0, 0)
+
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
for string in "a:b::c:::d", S("a:b::c:::d"):
--- /dev/null
+Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
+module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional.
+They should only be passed by keyword.