maliciously constructed data. If you need to parse untrusted or
unauthenticated data see :ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.6.7
+
+ The SAX parser no longer processes general external entities by default to
+ increase security by default. To enable processing of external entities,
+ pass a custom parser instance in::
+
+ from xml.dom.pulldom import parse
+ from xml.sax import make_parser
+ from xml.sax.handler import feature_external_ges
+
+ parser = make_parser()
+ parser.setFeature(feature_external_ges, True)
+ parse(filename, parser=parser)
+
Example::
========================= ============== =============== ============== ============== ==============
billion laughs **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
quadratic blowup **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
-external entity expansion **Vulnerable** Safe (1) Safe (2) **Vulnerable** Safe (3)
-`DTD`_ retrieval **Vulnerable** Safe Safe **Vulnerable** Safe
+external entity expansion Safe (4) Safe (1) Safe (2) Safe (4) Safe (3)
+`DTD`_ retrieval Safe (4) Safe Safe Safe (4) Safe
decompression bomb Safe Safe Safe Safe **Vulnerable**
========================= ============== =============== ============== ============== ==============
2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns
the unexpanded entity verbatim.
3. :mod:`xmlrpclib` doesn't expand external entities and omits them.
+4. Since Python 3.8.0, external general entities are no longer processed by
+ default since Python.
billion laughs / exponential entity expansion
constructed data. If you need to parse untrusted or unauthenticated data see
:ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.6.7
+
+ The SAX parser no longer processes general external entities by default
+ to increase security. Before, the parser created network connections
+ to fetch remote files or loaded local files from the file
+ system for DTD and entities. The feature can be enabled again with method
+ :meth:`~xml.sax.xmlreader.XMLReader.setFeature` on the parser object
+ and argument :data:`~xml.sax.handler.feature_external_ges`.
The convenience functions are:
environment. (Contributed by Brett Cannon in :issue:`25154`.)
+xml
+---
+
+* As mitigation against DTD and external entity retrieval, the
+ :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+ external entities by default.
+ (Contributed by Christian Heimes in :issue:`17239`.)
+
+
Deprecated functions and types of the C API
-------------------------------------------
* The functions in the :mod:`compileall` module now return booleans instead
of ``1`` or ``0`` to represent success or failure, respectively. Thanks to
- booleans being a subclass of integers, this should only be an issue if you
+ booleans being a subclass of integers, this should only be an issue if you7
were doing identity checks for ``1`` or ``0``. See :issue:`25768`.
* Reading the :attr:`~urllib.parse.SplitResult.port` attribute of
The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
locale to the ``LC_NUMERIC`` locale in some cases.
(Contributed by Victor Stinner in :issue:`31900`.)
+
+
+Notable changes in Python 3.6.7
+===============================
+
+:mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+external entities by default. See also :issue:`17239`.
import xml.sax
from xml.sax.xmlreader import AttributesImpl
+from xml.sax.handler import feature_external_ges
from xml.dom import pulldom
from test.support import findfile
self.fail(
"Ran out of events, but should have received END_DOCUMENT")
+ def test_external_ges_default(self):
+ parser = pulldom.parseString(SMALL_SAMPLE)
+ saxparser = parser.parser
+ ges = saxparser.getFeature(feature_external_ges)
+ self.assertEqual(ges, False)
+
class ThoroughTestCase(unittest.TestCase):
"""Test the hard-to-reach parts of pulldom."""
from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
-from xml.sax.handler import feature_namespaces
+from xml.sax.handler import feature_namespaces, feature_external_ges
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
import gc
import os.path
import shutil
+from urllib.error import URLError
from test import support
from test.support import findfile, run_unittest, TESTFN
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._entities.append((name, publicId, systemId, ndata))
+
+ class TestEntityRecorder:
+ def __init__(self):
+ self.entities = []
+
+ def resolveEntity(self, publicId, systemId):
+ self.entities.append((publicId, systemId))
+ source = InputSource()
+ source.setPublicId(publicId)
+ source.setSystemId(systemId)
+ return source
+
def test_expat_dtdhandler(self):
parser = create_parser()
handler = self.TestDTDHandler()
[("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
+ def test_expat_external_dtd_enabled(self):
+ parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ with self.assertRaises(URLError):
+ parser.feed(
+ '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+ )
+ self.assertEqual(
+ resolver.entities, [(None, 'unsupported://non-existing')]
+ )
+
+ def test_expat_external_dtd_default(self):
+ parser = create_parser()
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ parser.feed(
+ '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+ )
+ parser.feed('<doc />')
+ parser.close()
+ self.assertEqual(resolver.entities, [])
+
# ===== EntityResolver support
class TestEntityResolver:
inpsrc.setByteStream(BytesIO(b"<entity/>"))
return inpsrc
- def test_expat_entityresolver(self):
+ def test_expat_entityresolver_enabled(self):
parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
parser.setEntityResolver(self.TestEntityResolver())
result = BytesIO()
parser.setContentHandler(XMLGenerator(result))
self.assertEqual(result.getvalue(), start +
b"<doc><entity></entity></doc>")
+ def test_expat_entityresolver_default(self):
+ parser = create_parser()
+ self.assertEqual(parser.getFeature(feature_external_ges), False)
+ parser.setEntityResolver(self.TestEntityResolver())
+ result = BytesIO()
+ parser.setContentHandler(XMLGenerator(result))
+
+ parser.feed('<!DOCTYPE doc [\n')
+ parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
+ parser.feed(']>\n')
+ parser.feed('<doc>&test;</doc>')
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start +
+ b"<doc></doc>")
+
# ===== Attributes support
class AttrGatherer(ContentHandler):
<document>&entity;</document>
"""
+EXTERNAL_ENTITY_XML = """\
+<!DOCTYPE points [
+<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
+]>
+<document>&entity;</document>
+"""
class ModuleTest(unittest.TestCase):
def test_sanity(self):
root = parser.close()
self.serialize_check(root, '<document>text</document>')
+ # 4) external (SYSTEM) entity
+
+ with self.assertRaises(ET.ParseError) as cm:
+ ET.XML(EXTERNAL_ENTITY_XML)
+ self.assertEqual(str(cm.exception),
+ 'undefined entity &entity;: line 4, column 10')
+
def test_namespace(self):
# Test namespace issues.
self._lex_handler_prop = None
self._parsing = 0
self._entity_stack = []
- self._external_ges = 1
+ self._external_ges = 0
self._interning = None
# XMLReader methods
--- /dev/null
+The xml.sax and xml.dom.minidom parsers no longer processes external
+entities by default. External DTD and ENTITY declarations no longer
+load files or create network connections.