From: Serhiy Storchaka Date: Sun, 12 May 2013 14:29:34 +0000 (+0300) Subject: Issue #17606: Fixed support of encoded byte strings in the XMLGenerator X-Git-Tag: v2.7.6rc1~392 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7423903eee3f01e5781fa6840ee08ccf5f26e800;p=thirdparty%2FPython%2Fcpython.git Issue #17606: Fixed support of encoded byte strings in the XMLGenerator characters() and ignorableWhitespace() methods. Original patch by Sebastian Ortiz Vasquez. --- diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 3a20eac6b128..86638a2665d9 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -284,6 +284,26 @@ class XmlgenTest: self.assertEqual(result.getvalue(), start + " ") + def test_xmlgen_encoding_bytes(self): + encodings = ('iso-8859-15', 'utf-8', + 'utf-16be', 'utf-16le', + 'utf-32be', 'utf-32le') + for encoding in encodings: + result = self.ioclass() + gen = XMLGenerator(result, encoding=encoding) + + gen.startDocument() + gen.startElement("doc", {"a": u'\u20ac'}) + gen.characters(u"\u20ac".encode(encoding)) + gen.ignorableWhitespace(" ".encode(encoding)) + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), ( + u'\n' + u'\u20ac ' % encoding + ).encode(encoding, 'xmlcharrefreplace')) + def test_xmlgen_ns(self): result = self.ioclass() gen = XMLGenerator(result) diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py index 3d81a8e1bf47..1abcd9a0c4aa 100644 --- a/Lib/xml/sax/saxutils.py +++ b/Lib/xml/sax/saxutils.py @@ -180,10 +180,14 @@ class XMLGenerator(handler.ContentHandler): self._write(u'' % self._qname(name)) def characters(self, content): - self._write(escape(unicode(content))) + if not isinstance(content, unicode): + content = unicode(content, self._encoding) + self._write(escape(content)) def ignorableWhitespace(self, content): - self._write(unicode(content)) + if not isinstance(content, unicode): + content = unicode(content, self._encoding) + self._write(content) def processingInstruction(self, target, data): self._write(u'' % (target, data)) diff --git a/Misc/ACKS b/Misc/ACKS index 7f78dbdeb7b6..37ed4ce01e0b 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1045,6 +1045,7 @@ Case Van Horsen Kyle VanderBeek Atul Varma Dmitry Vasiliev +Sebastian Ortiz Vasquez Alexandre Vassalotti Frank Vercruesse Mike Verdone diff --git a/Misc/NEWS b/Misc/NEWS index 160234688095..6cdf42b171bd 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -38,6 +38,10 @@ Core and Builtins Library ------- +- Issue #17606: Fixed support of encoded byte strings in the XMLGenerator + .characters() and ignorableWhitespace() methods. Original patch by Sebastian + Ortiz Vasquez. + - Issue #16601: Restarting iteration over tarfile no more continues from where it left off. Patch by Michael Birtwell.