]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-139489: Add xml.is_valid_name() (GH-139768)
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 5 May 2026 08:25:22 +0000 (11:25 +0300)
committerGitHub <noreply@github.com>
Tue, 5 May 2026 08:25:22 +0000 (11:25 +0300)
It allows to check whether a string can be used as an element or attribute
name in XML.

Doc/library/xml.rst
Doc/whatsnew/3.15.rst
Lib/test/test_xml.py [new file with mode: 0644]
Lib/xml/__init__.py
Lib/xml/utils.py [new file with mode: 0644]
Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst [new file with mode: 0644]

index a47d31465b19f128c972cc4cfa01d01f20744a5a..f9ffaa9a94aacc2360b253c460a5b322904912fb 100644 (file)
@@ -41,6 +41,21 @@ The XML handling submodules are:
 * :mod:`xml.sax`: SAX2 base classes and convenience functions
 * :mod:`xml.parsers.expat`: the Expat parser binding
 
+This module also defines utility functions.
+
+.. function:: is_valid_name(name)
+
+   Return ``True`` if the string is a valid element or attribute name,
+   ``False`` otherwise.
+
+   Almost all characters are permitted in names, except control characters and
+   those which either are or reasonably could be used as delimiters.
+   Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/",
+   "!", "?", and "=" are forbidden.
+   The name cannot start with a digit or a character like "-", ".", and "·".
+
+   ..versionadded:: next
+
 
 .. _xml-security:
 .. _xml-vulnerabilities:
index 4f83916e3b77c2b94be522352e18d87504dd601f..828bc1d1d64d0a2a97a4b0e798bf85700cde35b4 100644 (file)
@@ -1653,6 +1653,14 @@ wave
 (Contributed by Lionel Koenig and Michiel W. Beijen in :gh:`60729`.)
 
 
+xml
+---
+
+* Add the :func:`xml.is_valid_name` function, which allows to check
+  whether a string can be used as an element or attribute name in XML.
+  (Contributed by Serhiy Storchaka in :gh:`139489`.)
+
+
 xml.parsers.expat
 -----------------
 
diff --git a/Lib/test/test_xml.py b/Lib/test/test_xml.py
new file mode 100644 (file)
index 0000000..fd3633e
--- /dev/null
@@ -0,0 +1,27 @@
+import xml
+import unittest
+
+
+class TestUtils(unittest.TestCase):
+
+    def test_is_valid_name(self):
+        is_valid_name = xml.is_valid_name
+        self.assertFalse(is_valid_name(''))
+        self.assertTrue(is_valid_name('name'))
+        self.assertTrue(is_valid_name('NAME'))
+        self.assertTrue(is_valid_name('name0:-._·'))
+        self.assertTrue(is_valid_name('_'))
+        self.assertTrue(is_valid_name(':'))
+        self.assertTrue(is_valid_name('Ñàḿĕ'))
+        self.assertTrue(is_valid_name('\U000EFFFF'))
+        self.assertFalse(is_valid_name('0'))
+        self.assertFalse(is_valid_name('-'))
+        self.assertFalse(is_valid_name('.'))
+        self.assertFalse(is_valid_name('·'))
+        self.assertFalse(is_valid_name('na me'))
+        for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000':
+            self.assertFalse(is_valid_name('name' + c))
+
+
+if __name__ == '__main__':
+    unittest.main()
index bf6d8ddfd04c934003004472d7f03bfa3f06b5f5..002d6d3e0e8267c4e5939f0cd35bc50fdb9c6a48 100644 (file)
@@ -16,5 +16,6 @@ etree -- The ElementTree XML library.  This is a subset of the full
 
 """
 
+from .utils import *
 
-__all__ = ["dom", "parsers", "sax", "etree"]
+__all__ = ["dom", "parsers", "sax", "etree", "is_valid_name"]
diff --git a/Lib/xml/utils.py b/Lib/xml/utils.py
new file mode 100644 (file)
index 0000000..c9a0b26
--- /dev/null
@@ -0,0 +1,25 @@
+lazy import re as _re
+
+
+def is_valid_name(name):
+    """Test whether a string is a valid element or attribute name."""
+    # https://www.w3.org/TR/xml/#NT-Name
+    return _re.fullmatch(
+        # NameStartChar
+        '['
+            ':A-Z_a-z'
+            '\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF'
+            '\u200C\u200D'
+            '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
+            '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
+        ']'
+        # NameChar
+        '['
+            r'\-.0-9:A-Z_a-z'
+            '\xB7'
+            '\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF'
+            '\u200C\u200D\u203F\u2040'
+            '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
+            '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
+        ']*+',
+        name) is not None
diff --git a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst
new file mode 100644 (file)
index 0000000..05edefd
--- /dev/null
@@ -0,0 +1,2 @@
+Add the :func:`xml.is_valid_name` function, which allows to check
+whether a string can be used as an element or attribute name in XML.