From: Serhiy Storchaka Date: Tue, 5 May 2026 08:25:22 +0000 (+0300) Subject: gh-139489: Add xml.is_valid_name() (GH-139768) X-Git-Tag: v3.15.0b1~46 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a76d9573e45dc11cb0909154fa3e68591dfab85f;p=thirdparty%2FPython%2Fcpython.git gh-139489: Add xml.is_valid_name() (GH-139768) It allows to check whether a string can be used as an element or attribute name in XML. --- diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index a47d31465b19..f9ffaa9a94aa 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -41,6 +41,21 @@ The XML handling submodules are: * :mod:`xml.sax`: SAX2 base classes and convenience functions * :mod:`xml.parsers.expat`: the Expat parser binding +This module also defines utility functions. + +.. function:: is_valid_name(name) + + Return ``True`` if the string is a valid element or attribute name, + ``False`` otherwise. + + Almost all characters are permitted in names, except control characters and + those which either are or reasonably could be used as delimiters. + Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/", + "!", "?", and "=" are forbidden. + The name cannot start with a digit or a character like "-", ".", and "·". + + ..versionadded:: next + .. _xml-security: .. _xml-vulnerabilities: diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 4f83916e3b77..828bc1d1d64d 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1653,6 +1653,14 @@ wave (Contributed by Lionel Koenig and Michiel W. Beijen in :gh:`60729`.) +xml +--- + +* Add the :func:`xml.is_valid_name` function, which allows to check + whether a string can be used as an element or attribute name in XML. + (Contributed by Serhiy Storchaka in :gh:`139489`.) + + xml.parsers.expat ----------------- diff --git a/Lib/test/test_xml.py b/Lib/test/test_xml.py new file mode 100644 index 000000000000..fd3633e43982 --- /dev/null +++ b/Lib/test/test_xml.py @@ -0,0 +1,27 @@ +import xml +import unittest + + +class TestUtils(unittest.TestCase): + + def test_is_valid_name(self): + is_valid_name = xml.is_valid_name + self.assertFalse(is_valid_name('')) + self.assertTrue(is_valid_name('name')) + self.assertTrue(is_valid_name('NAME')) + self.assertTrue(is_valid_name('name0:-._·')) + self.assertTrue(is_valid_name('_')) + self.assertTrue(is_valid_name(':')) + self.assertTrue(is_valid_name('Ñàḿĕ')) + self.assertTrue(is_valid_name('\U000EFFFF')) + self.assertFalse(is_valid_name('0')) + self.assertFalse(is_valid_name('-')) + self.assertFalse(is_valid_name('.')) + self.assertFalse(is_valid_name('·')) + self.assertFalse(is_valid_name('na me')) + for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000': + self.assertFalse(is_valid_name('name' + c)) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/xml/__init__.py b/Lib/xml/__init__.py index bf6d8ddfd04c..002d6d3e0e82 100644 --- a/Lib/xml/__init__.py +++ b/Lib/xml/__init__.py @@ -16,5 +16,6 @@ etree -- The ElementTree XML library. This is a subset of the full """ +from .utils import * -__all__ = ["dom", "parsers", "sax", "etree"] +__all__ = ["dom", "parsers", "sax", "etree", "is_valid_name"] diff --git a/Lib/xml/utils.py b/Lib/xml/utils.py new file mode 100644 index 000000000000..c9a0b260675b --- /dev/null +++ b/Lib/xml/utils.py @@ -0,0 +1,25 @@ +lazy import re as _re + + +def is_valid_name(name): + """Test whether a string is a valid element or attribute name.""" + # https://www.w3.org/TR/xml/#NT-Name + return _re.fullmatch( + # NameStartChar + '[' + ':A-Z_a-z' + '\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF' + '\u200C\u200D' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']' + # NameChar + '[' + r'\-.0-9:A-Z_a-z' + '\xB7' + '\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF' + '\u200C\u200D\u203F\u2040' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']*+', + name) is not None diff --git a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst new file mode 100644 index 000000000000..05edefda053a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst @@ -0,0 +1,2 @@ +Add the :func:`xml.is_valid_name` function, which allows to check +whether a string can be used as an element or attribute name in XML.