maliciously constructed data. If you need to parse untrusted or
unauthenticated data see :ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.8
+
+ The SAX parser no longer processes general external entities by default to
+ increase security by default. To enable processing of external entities,
+ pass a custom parser instance in::
+
+ from xml.dom.pulldom import parse
+ from xml.sax import make_parser
+ from xml.sax.handler import feature_external_ges
+
+ parser = make_parser()
+ parser.setFeature(feature_external_ges, True)
+ parse(filename, parser=parser)
+
Example::
========================= ============== =============== ============== ============== ==============
billion laughs **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
quadratic blowup **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
-external entity expansion **Vulnerable** Safe (1) Safe (2) **Vulnerable** Safe (3)
-`DTD`_ retrieval **Vulnerable** Safe Safe **Vulnerable** Safe
+external entity expansion Safe (4) Safe (1) Safe (2) Safe (4) Safe (3)
+`DTD`_ retrieval Safe (4) Safe Safe Safe (4) Safe
decompression bomb Safe Safe Safe Safe **Vulnerable**
========================= ============== =============== ============== ============== ==============
2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns
the unexpanded entity verbatim.
3. :mod:`xmlrpclib` doesn't expand external entities and omits them.
+4. Since Python 3.8.0, external general entities are no longer processed by
+ default since Python.
billion laughs / exponential entity expansion
constructed data. If you need to parse untrusted or unauthenticated data see
:ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.8
+
+ The SAX parser no longer processes general external entities by default
+ to increase security. Before, the parser created network connections
+ to fetch remote files or loaded local files from the file
+ system for DTD and entities. The feature can be enabled again with method
+ :meth:`~xml.sax.xmlreader.XMLReader.setFeature` on the parser object
+ and argument :data:`~xml.sax.handler.feature_external_ges`.
The convenience functions are:
activating virtual environments under PowerShell Core 6.1.
(Contributed by Brett Cannon in :issue:`32718`.)
+xml
+---
+
+* As mitigation against DTD and external entity retrieval, the
+ :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+ external entities by default.
+ (Contributed by Christian Heimes in :issue:`17239`.)
+
+
Optimizations
=============
* :class:`uuid.UUID` now uses ``__slots__``, therefore instances can no longer
be weak-referenced and attributes can no longer be added.
+* :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+ external entities by default.
+ (Contributed by Christian Heimes in :issue:`17239`.)
CPython bytecode changes
------------------------
import xml.sax
from xml.sax.xmlreader import AttributesImpl
+from xml.sax.handler import feature_external_ges
from xml.dom import pulldom
from test.support import findfile
# This should have returned 'END_ELEMENT'.
self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
+ def test_external_ges_default(self):
+ parser = pulldom.parseString(SMALL_SAMPLE)
+ saxparser = parser.parser
+ ges = saxparser.getFeature(feature_external_ges)
+ self.assertEqual(ges, False)
+
class ThoroughTestCase(unittest.TestCase):
"""Test the hard-to-reach parts of pulldom."""
from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
-from xml.sax.handler import feature_namespaces
+from xml.sax.handler import feature_namespaces, feature_external_ges
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
import gc
import os.path
import shutil
+from urllib.error import URLError
from test import support
from test.support import findfile, run_unittest, TESTFN
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._entities.append((name, publicId, systemId, ndata))
+
+ class TestEntityRecorder:
+ def __init__(self):
+ self.entities = []
+
+ def resolveEntity(self, publicId, systemId):
+ self.entities.append((publicId, systemId))
+ source = InputSource()
+ source.setPublicId(publicId)
+ source.setSystemId(systemId)
+ return source
+
def test_expat_dtdhandler(self):
parser = create_parser()
handler = self.TestDTDHandler()
[("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
+ def test_expat_external_dtd_enabled(self):
+ parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ with self.assertRaises(URLError):
+ parser.feed(
+ '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+ )
+ self.assertEqual(
+ resolver.entities, [(None, 'unsupported://non-existing')]
+ )
+
+ def test_expat_external_dtd_default(self):
+ parser = create_parser()
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ parser.feed(
+ '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+ )
+ parser.feed('<doc />')
+ parser.close()
+ self.assertEqual(resolver.entities, [])
+
# ===== EntityResolver support
class TestEntityResolver:
inpsrc.setByteStream(BytesIO(b"<entity/>"))
return inpsrc
- def test_expat_entityresolver(self):
+ def test_expat_entityresolver_enabled(self):
parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
parser.setEntityResolver(self.TestEntityResolver())
result = BytesIO()
parser.setContentHandler(XMLGenerator(result))
self.assertEqual(result.getvalue(), start +
b"<doc><entity></entity></doc>")
+ def test_expat_entityresolver_default(self):
+ parser = create_parser()
+ self.assertEqual(parser.getFeature(feature_external_ges), False)
+ parser.setEntityResolver(self.TestEntityResolver())
+ result = BytesIO()
+ parser.setContentHandler(XMLGenerator(result))
+
+ parser.feed('<!DOCTYPE doc [\n')
+ parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
+ parser.feed(']>\n')
+ parser.feed('<doc>&test;</doc>')
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start +
+ b"<doc></doc>")
+
# ===== Attributes support
class AttrGatherer(ContentHandler):
<document>&entity;</document>
"""
+EXTERNAL_ENTITY_XML = """\
+<!DOCTYPE points [
+<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
+]>
+<document>&entity;</document>
+"""
def checkwarnings(*filters, quiet=False):
def decorator(test):
root = parser.close()
self.serialize_check(root, '<document>text</document>')
+ # 4) external (SYSTEM) entity
+
+ with self.assertRaises(ET.ParseError) as cm:
+ ET.XML(EXTERNAL_ENTITY_XML)
+ self.assertEqual(str(cm.exception),
+ 'undefined entity &entity;: line 4, column 10')
+
def test_namespace(self):
# Test namespace issues.
self._lex_handler_prop = None
self._parsing = 0
self._entity_stack = []
- self._external_ges = 1
+ self._external_ges = 0
self._interning = None
# XMLReader methods
--- /dev/null
+The xml.sax and xml.dom.minidom parsers no longer processes external
+entities by default. External DTD and ENTITY declarations no longer
+load files or create network connections.