]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Issue #10590: Added tests for xml.sax.parse() and xml.sax.parseString().
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 2 Apr 2015 20:05:23 +0000 (23:05 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Thu, 2 Apr 2015 20:05:23 +0000 (23:05 +0300)
Lib/test/test_sax.py

index e314b7c77125e07f7305c04a23e119750760ee3e..5019722ed10fab9c7075ddaad5262417d7070de5 100644 (file)
@@ -15,10 +15,11 @@ from xml.sax.handler import feature_namespaces
 from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
 from cStringIO import StringIO
 import io
+import gc
 import os.path
 import shutil
 import test.test_support as support
-from test.test_support import findfile, run_unittest
+from test.test_support import findfile, run_unittest, TESTFN
 import unittest
 
 TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
@@ -90,6 +91,111 @@ class XmlTestBase(unittest.TestCase):
         self.assertEqual(attrs["attr"], "val")
         self.assertEqual(attrs.getQNameByName("attr"), "attr")
 
+
+def xml_unicode(doc, encoding=None):
+    if encoding is None:
+        return doc
+    return u'<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
+
+def xml_bytes(doc, encoding, decl_encoding=Ellipsis):
+    if decl_encoding is Ellipsis:
+        decl_encoding = encoding
+    return xml_unicode(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
+
+def make_xml_file(doc, encoding, decl_encoding=Ellipsis):
+    if decl_encoding is Ellipsis:
+        decl_encoding = encoding
+    with io.open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
+        f.write(xml_unicode(doc, decl_encoding))
+
+
+class ParseTest(unittest.TestCase):
+    data = support.u(r'<money value="$\xa3\u20ac\U0001017b">'
+                     r'$\xa3\u20ac\U0001017b</money>')
+
+    def tearDown(self):
+        support.unlink(TESTFN)
+
+    def check_parse(self, f):
+        from xml.sax import parse
+        result = StringIO()
+        parse(f, XMLGenerator(result, 'utf-8'))
+        self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8'))
+
+    def test_parse_bytes(self):
+        # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
+        # UTF-16 is autodetected
+        encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
+        for encoding in encodings:
+            self.check_parse(io.BytesIO(xml_bytes(self.data, encoding)))
+            make_xml_file(self.data, encoding)
+            self.check_parse(TESTFN)
+            with io.open(TESTFN, 'rb') as f:
+                self.check_parse(f)
+            self.check_parse(io.BytesIO(xml_bytes(self.data, encoding, None)))
+            make_xml_file(self.data, encoding, None)
+            self.check_parse(TESTFN)
+            with io.open(TESTFN, 'rb') as f:
+                self.check_parse(f)
+        # accept UTF-8 with BOM
+        self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
+        make_xml_file(self.data, 'utf-8-sig', 'utf-8')
+        self.check_parse(TESTFN)
+        with io.open(TESTFN, 'rb') as f:
+            self.check_parse(f)
+        self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
+        make_xml_file(self.data, 'utf-8-sig', None)
+        self.check_parse(TESTFN)
+        with io.open(TESTFN, 'rb') as f:
+            self.check_parse(f)
+        # accept data with declared encoding
+        self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1')))
+        make_xml_file(self.data, 'iso-8859-1')
+        self.check_parse(TESTFN)
+        with io.open(TESTFN, 'rb') as f:
+            self.check_parse(f)
+        # fail on non-UTF-8 incompatible data without declared encoding
+        with self.assertRaises(SAXException):
+            self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
+        make_xml_file(self.data, 'iso-8859-1', None)
+        with self.assertRaises(SAXException):
+            self.check_parse(TESTFN)
+        with io.open(TESTFN, 'rb') as f:
+            with self.assertRaises(SAXException):
+                self.check_parse(f)
+
+    def test_parse_InputSource(self):
+        # accept data without declared but with explicitly specified encoding
+        make_xml_file(self.data, 'iso-8859-1', None)
+        with io.open(TESTFN, 'rb') as f:
+            input = InputSource()
+            input.setByteStream(f)
+            input.setEncoding('iso-8859-1')
+            self.check_parse(input)
+
+    def check_parseString(self, s):
+        from xml.sax import parseString
+        result = StringIO()
+        parseString(s, XMLGenerator(result, 'utf-8'))
+        self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8'))
+
+    def test_parseString_bytes(self):
+        # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
+        # UTF-16 is autodetected
+        encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
+        for encoding in encodings:
+            self.check_parseString(xml_bytes(self.data, encoding))
+            self.check_parseString(xml_bytes(self.data, encoding, None))
+        # accept UTF-8 with BOM
+        self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
+        self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
+        # accept data with declared encoding
+        self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
+        # fail on non-UTF-8 incompatible data without declared encoding
+        with self.assertRaises(SAXException):
+            self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
+
+
 class MakeParserTest(unittest.TestCase):
     def test_make_parser2(self):
         # Creating parsers several times in a row should succeed.
@@ -949,6 +1055,7 @@ class XmlReaderTest(XmlTestBase):
 
 def test_main():
     run_unittest(MakeParserTest,
+                 ParseTest,
                  SaxutilsTest,
                  PrepareInputSourceTest,
                  StringXmlgenTest,