]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.6] bpo-17239: Disable external entities in SAX parser (GH-9217) (GH-9512)
authorChristian Heimes <christian@python.org>
Mon, 24 Sep 2018 12:38:31 +0000 (14:38 +0200)
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 24 Sep 2018 12:38:31 +0000 (05:38 -0700)
The SAX parser no longer processes general external entities by default
to increase security. Before, the parser created network connections
to fetch remote files or loaded local files from the file system for DTD
and entities.

Signed-off-by: Christian Heimes <christian@python.org>
https://bugs.python.org/issue17239.
(cherry picked from commit 17b1d5d4e36aa57a9b25a0e694affbd1ee637e45)

Co-authored-by: Christian Heimes <christian@python.org>
https://bugs.python.org/issue17239

Doc/library/xml.dom.pulldom.rst
Doc/library/xml.rst
Doc/library/xml.sax.rst
Doc/whatsnew/3.6.rst
Lib/test/test_pulldom.py
Lib/test/test_sax.py
Lib/test/test_xml_etree.py
Lib/xml/sax/expatreader.py
Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst [new file with mode: 0644]

index 5c0f469ad7a5cf5e32508480100ef91330cf9134..2504409e7f87cce67a44d3ae46af2471bc896e8d 100644 (file)
@@ -25,6 +25,20 @@ events until either processing is finished or an error condition occurs.
    maliciously constructed data.  If you need to parse untrusted or
    unauthenticated data see :ref:`xml-vulnerabilities`.
 
+.. versionchanged:: 3.6.7
+
+   The SAX parser no longer processes general external entities by default to
+   increase security by default. To enable processing of external entities,
+   pass a custom parser instance in::
+
+      from xml.dom.pulldom import parse
+      from xml.sax import make_parser
+      from xml.sax.handler import feature_external_ges
+
+      parser = make_parser()
+      parser.setFeature(feature_external_ges, True)
+      parse(filename, parser=parser)
+
 
 Example::
 
index 63c24f80ac87c41d9524b3a17725ac506816b26c..9b8ba6b17c85314b7e4974aa62250cdf393fc35a 100644 (file)
@@ -65,8 +65,8 @@ kind                       sax              etree             minidom          p
 =========================  ==============   ===============   ==============   ==============   ==============
 billion laughs             **Vulnerable**   **Vulnerable**    **Vulnerable**   **Vulnerable**   **Vulnerable**
 quadratic blowup           **Vulnerable**   **Vulnerable**    **Vulnerable**   **Vulnerable**   **Vulnerable**
-external entity expansion  **Vulnerable**   Safe    (1)       Safe    (2)      **Vulnerable**   Safe    (3)
-`DTD`_ retrieval           **Vulnerable**   Safe              Safe             **Vulnerable**   Safe
+external entity expansion  Safe (4)         Safe    (1)       Safe    (2)      Safe (4)         Safe    (3)
+`DTD`_ retrieval           Safe (4)         Safe              Safe             Safe (4)         Safe
 decompression bomb         Safe             Safe              Safe             Safe             **Vulnerable**
 =========================  ==============   ===============   ==============   ==============   ==============
 
@@ -75,6 +75,8 @@ decompression bomb         Safe             Safe              Safe             S
 2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns
    the unexpanded entity verbatim.
 3. :mod:`xmlrpclib` doesn't expand external entities and omits them.
+4. Since Python 3.8.0, external general entities are no longer processed by
+   default since Python.
 
 
 billion laughs / exponential entity expansion
index 78d6633e098ba55cd1262ec9c9db704f7dfbb943..1a8f183a945fa4409a7640654267d1ebf2a31927 100644 (file)
@@ -24,6 +24,14 @@ the SAX API.
    constructed data.  If you need to parse untrusted or unauthenticated data see
    :ref:`xml-vulnerabilities`.
 
+.. versionchanged:: 3.6.7
+
+   The SAX parser no longer processes general external entities by default
+   to increase security. Before, the parser created network connections
+   to fetch remote files or loaded local files from the file
+   system for DTD and entities. The feature can be enabled again with method
+   :meth:`~xml.sax.xmlreader.XMLReader.setFeature` on the parser object
+   and argument :data:`~xml.sax.handler.feature_external_ges`.
 
 The convenience functions are:
 
index d375cff1af9431f1c8a94d00e4576c7d3a4fc468..08d2d24294d0a8998a572a6211092beb6f435e86 100644 (file)
@@ -2055,6 +2055,15 @@ connected to and thus what Python interpreter will be used by the virtual
 environment.  (Contributed by Brett Cannon in :issue:`25154`.)
 
 
+xml
+---
+
+* As mitigation against DTD and external entity retrieval, the
+  :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+  external entities by default.
+  (Contributed by Christian Heimes in :issue:`17239`.)
+
+
 Deprecated functions and types of the C API
 -------------------------------------------
 
@@ -2163,7 +2172,7 @@ Changes in the Python API
 
 * The functions in the :mod:`compileall` module now return booleans instead
   of ``1`` or ``0`` to represent success or failure, respectively. Thanks to
-  booleans being a subclass of integers, this should only be an issue if you
+  booleans being a subclass of integers, this should only be an issue if you7
   were doing identity checks for ``1`` or ``0``. See :issue:`25768`.
 
 * Reading the :attr:`~urllib.parse.SplitResult.port` attribute of
@@ -2408,3 +2417,10 @@ Notable changes in Python 3.6.5
 The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
 locale to the ``LC_NUMERIC`` locale in some cases.
 (Contributed by Victor Stinner in :issue:`31900`.)
+
+
+Notable changes in Python 3.6.7
+===============================
+
+:mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+external entities by default. See also :issue:`17239`.
index 3d89e3adda26ce646d3a05eced17aca727e6e779..6dc51e4371d0f630300fa9487f88015e9e28793b 100644 (file)
@@ -3,6 +3,7 @@ import unittest
 import xml.sax
 
 from xml.sax.xmlreader import AttributesImpl
+from xml.sax.handler import feature_external_ges
 from xml.dom import pulldom
 
 from test.support import findfile
@@ -159,6 +160,12 @@ class PullDOMTestCase(unittest.TestCase):
             self.fail(
                 "Ran out of events, but should have received END_DOCUMENT")
 
+    def test_external_ges_default(self):
+        parser = pulldom.parseString(SMALL_SAMPLE)
+        saxparser = parser.parser
+        ges = saxparser.getFeature(feature_external_ges)
+        self.assertEqual(ges, False)
+
 
 class ThoroughTestCase(unittest.TestCase):
     """Test the hard-to-reach parts of pulldom."""
index 2eb62905ffa882ba04c181af54a51424e3132d4e..3044960a0ed165466a7a425161ba49e5e50ed98c 100644 (file)
@@ -13,13 +13,14 @@ except SAXReaderNotAvailable:
 from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
                              XMLFilterBase, prepare_input_source
 from xml.sax.expatreader import create_parser
-from xml.sax.handler import feature_namespaces
+from xml.sax.handler import feature_namespaces, feature_external_ges
 from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
 from io import BytesIO, StringIO
 import codecs
 import gc
 import os.path
 import shutil
+from urllib.error import URLError
 from test import support
 from test.support import findfile, run_unittest, TESTFN
 
@@ -911,6 +912,18 @@ class ExpatReaderTest(XmlTestBase):
         def unparsedEntityDecl(self, name, publicId, systemId, ndata):
             self._entities.append((name, publicId, systemId, ndata))
 
+
+    class TestEntityRecorder:
+        def __init__(self):
+            self.entities = []
+
+        def resolveEntity(self, publicId, systemId):
+            self.entities.append((publicId, systemId))
+            source = InputSource()
+            source.setPublicId(publicId)
+            source.setSystemId(systemId)
+            return source
+
     def test_expat_dtdhandler(self):
         parser = create_parser()
         handler = self.TestDTDHandler()
@@ -927,6 +940,32 @@ class ExpatReaderTest(XmlTestBase):
             [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
         self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
 
+    def test_expat_external_dtd_enabled(self):
+        parser = create_parser()
+        parser.setFeature(feature_external_ges, True)
+        resolver = self.TestEntityRecorder()
+        parser.setEntityResolver(resolver)
+
+        with self.assertRaises(URLError):
+            parser.feed(
+                '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+            )
+        self.assertEqual(
+            resolver.entities, [(None, 'unsupported://non-existing')]
+        )
+
+    def test_expat_external_dtd_default(self):
+        parser = create_parser()
+        resolver = self.TestEntityRecorder()
+        parser.setEntityResolver(resolver)
+
+        parser.feed(
+            '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+        )
+        parser.feed('<doc />')
+        parser.close()
+        self.assertEqual(resolver.entities, [])
+
     # ===== EntityResolver support
 
     class TestEntityResolver:
@@ -936,8 +975,9 @@ class ExpatReaderTest(XmlTestBase):
             inpsrc.setByteStream(BytesIO(b"<entity/>"))
             return inpsrc
 
-    def test_expat_entityresolver(self):
+    def test_expat_entityresolver_enabled(self):
         parser = create_parser()
+        parser.setFeature(feature_external_ges, True)
         parser.setEntityResolver(self.TestEntityResolver())
         result = BytesIO()
         parser.setContentHandler(XMLGenerator(result))
@@ -951,6 +991,22 @@ class ExpatReaderTest(XmlTestBase):
         self.assertEqual(result.getvalue(), start +
                          b"<doc><entity></entity></doc>")
 
+    def test_expat_entityresolver_default(self):
+        parser = create_parser()
+        self.assertEqual(parser.getFeature(feature_external_ges), False)
+        parser.setEntityResolver(self.TestEntityResolver())
+        result = BytesIO()
+        parser.setContentHandler(XMLGenerator(result))
+
+        parser.feed('<!DOCTYPE doc [\n')
+        parser.feed('  <!ENTITY test SYSTEM "whatever">\n')
+        parser.feed(']>\n')
+        parser.feed('<doc>&test;</doc>')
+        parser.close()
+
+        self.assertEqual(result.getvalue(), start +
+                         b"<doc></doc>")
+
     # ===== Attributes support
 
     class AttrGatherer(ContentHandler):
index c14f8397ae7da522a2a9c58b8b9dc9398fedda1c..2b8c5e59865652cbc0bef1f0a29b2ffacaa8a349 100644 (file)
@@ -90,6 +90,12 @@ ENTITY_XML = """\
 <document>&entity;</document>
 """
 
+EXTERNAL_ENTITY_XML = """\
+<!DOCTYPE points [
+<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
+]>
+<document>&entity;</document>
+"""
 
 class ModuleTest(unittest.TestCase):
     def test_sanity(self):
@@ -846,6 +852,13 @@ class ElementTreeTest(unittest.TestCase):
         root = parser.close()
         self.serialize_check(root, '<document>text</document>')
 
+        # 4) external (SYSTEM) entity
+
+        with self.assertRaises(ET.ParseError) as cm:
+            ET.XML(EXTERNAL_ENTITY_XML)
+        self.assertEqual(str(cm.exception),
+                'undefined entity &entity;: line 4, column 10')
+
     def test_namespace(self):
         # Test namespace issues.
 
index 421358fa5bc7f02023dd33c5ce1147f7b3a5e114..5066ffc2fa51f029eb7147cd7becbd4e835f823d 100644 (file)
@@ -95,7 +95,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
         self._lex_handler_prop = None
         self._parsing = 0
         self._entity_stack = []
-        self._external_ges = 1
+        self._external_ges = 0
         self._interning = None
 
     # XMLReader methods
diff --git a/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst
new file mode 100644 (file)
index 0000000..8dd0fe8
--- /dev/null
@@ -0,0 +1,3 @@
+The xml.sax and xml.dom.minidom parsers no longer processes external
+entities by default. External DTD and ENTITY declarations no longer
+load files or create network connections.