Issue #17606: Fixed support of encoded byte strings in the XMLGenerator

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 12 May 2013 14:29:34 +0000 (17:29 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 12 May 2013 14:29:34 +0000 (17:29 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 12 May 2013 14:29:34 +0000 (17:29 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 12 May 2013 14:29:34 +0000 (17:29 +0300)
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py

index 3a20eac6b12824b34e3541113c245226636b704b..86638a2665d98d5696680f2fff074dbb249c6c92 100644 (file)
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -284,6 +284,26 @@ class XmlgenTest:
  
          self.assertEqual(result.getvalue(), start + "<doc> </doc>")
  
+    def test_xmlgen_encoding_bytes(self):
+        encodings = ('iso-8859-15', 'utf-8',
+                     'utf-16be', 'utf-16le',
+                     'utf-32be', 'utf-32le')
+        for encoding in encodings:
+            result = self.ioclass()
+            gen = XMLGenerator(result, encoding=encoding)
+
+            gen.startDocument()
+            gen.startElement("doc", {"a": u'\u20ac'})
+            gen.characters(u"\u20ac".encode(encoding))
+            gen.ignorableWhitespace(" ".encode(encoding))
+            gen.endElement("doc")
+            gen.endDocument()
+
+            self.assertEqual(result.getvalue(), (
+                u'<?xml version="1.0" encoding="%s"?>\n'
+                u'<doc a="\u20ac">\u20ac </doc>' % encoding
+                ).encode(encoding, 'xmlcharrefreplace'))
+
      def test_xmlgen_ns(self):
          result = self.ioclass()
          gen = XMLGenerator(result)
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py

index 3d81a8e1bf47ef55b38225039f7d89344dc3dc9f..1abcd9a0c4aa8bf03359d0f00ea0b84489d235b2 100644 (file)
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -180,10 +180,14 @@ class XMLGenerator(handler.ContentHandler):
          self._write(u'</%s>' % self._qname(name))
  
      def characters(self, content):
-        self._write(escape(unicode(content)))
+        if not isinstance(content, unicode):
+            content = unicode(content, self._encoding)
+        self._write(escape(content))
  
      def ignorableWhitespace(self, content):
-        self._write(unicode(content))
+        if not isinstance(content, unicode):
+            content = unicode(content, self._encoding)
+        self._write(content)
  
      def processingInstruction(self, target, data):
          self._write(u'<?%s %s?>' % (target, data))
diff --git a/Misc/ACKS b/Misc/ACKS

index 7f78dbdeb7b647b65f3a852a46fae8991f9f0b78..37ed4ce01e0b0eef579606f1e8da675d521010f9 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1045,6 +1045,7 @@ Case Van Horsen
  Kyle VanderBeek
  Atul Varma
  Dmitry Vasiliev
+Sebastian Ortiz Vasquez
  Alexandre Vassalotti
  Frank Vercruesse
  Mike Verdone
diff --git a/Misc/NEWS b/Misc/NEWS

index 160234688095dbb9895e424854bcfd40417a4738..6cdf42b171bd59c09795186d46a24c7563513ab8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -38,6 +38,10 @@ Core and Builtins
  Library
  -------
  
+- Issue #17606: Fixed support of encoded byte strings in the XMLGenerator
+ .characters() and ignorableWhitespace() methods.  Original patch by Sebastian
+  Ortiz Vasquez.
+
  - Issue #16601: Restarting iteration over tarfile no more continues from where
    it left off.  Patch by Michael Birtwell.
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 12 May 2013 14:29:34 +0000 (17:29 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 12 May 2013 14:29:34 +0000 (17:29 +0300)
Lib/test/test_sax.py		patch \| blob \| blame \| history
Lib/xml/sax/saxutils.py		patch \| blob \| blame \| history
Misc/ACKS		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history