Patches to address SF bugs 1409538 (Japanese codecs in CODEC_MAP) and 1409455

author Barry Warsaw <barry@python.org>

Wed, 8 Feb 2006 13:33:20 +0000 (13:33 +0000)

committer Barry Warsaw <barry@python.org>

Wed, 8 Feb 2006 13:33:20 +0000 (13:33 +0000)
author Barry Warsaw <barry@python.org>
Wed, 8 Feb 2006 13:33:20 +0000 (13:33 +0000)
committer Barry Warsaw <barry@python.org>
Wed, 8 Feb 2006 13:33:20 +0000 (13:33 +0000)
diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py

index dd328e050152ee352f360171972b4f1cef6d3f7b..fb4e5a9b185d84b229fa613f5f8ad9ffe38c089e 100644 (file)
--- a/Lib/email/Charset.py
+++ b/Lib/email/Charset.py
@@ -1,5 +1,5 @@
-# Copyright (C) 2001,2002 Python Software Foundation
-# Author: che@debian.org (Ben Gertzfield), barry@zope.com (Barry Warsaw)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield), barry@python.org (Barry Warsaw)
  
  from types import UnicodeType
  from email.Encoders import encode_7or8bit
@@ -99,20 +99,13 @@ ALIASES = {
  # of stability and useability.
  
  CODEC_MAP = {
-    'euc-jp':      'japanese.euc-jp',
-    'iso-2022-jp': 'japanese.iso-2022-jp',
-    'shift_jis':   'japanese.shift_jis',
-    'euc-kr':      'korean.euc-kr',
-    'ks_c_5601-1987': 'korean.cp949',
-    'iso-2022-kr': 'korean.iso-2022-kr',
-    'johab':       'korean.johab',
-    'gb2132':      'eucgb2312_cn',
-    'big5':        'big5_tw',
-    'utf-8':       'utf-8',
+    'gb2132':   'eucgb2312_cn',
+    'big5':     'big5_tw',
+    'utf-8':    'utf-8',
      # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
      # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
      # Let that stuff pass through without conversion to/from Unicode.
-    'us-ascii':    None,
+    'us-ascii': None,
      }
  
  
@@ -165,6 +158,26 @@ def add_codec(charset, codecname):
      CODEC_MAP[charset] = codecname
  
  
+def _find_asian_codec(charset, language):
+    try:
+        unicode('foo', charset)
+        return charset
+    except LookupError:
+        try:
+            codec = language + '.' + charset
+            unicode('foo', codec)
+            return codec
+        except LookupError:
+            return None
+
+
+for _charset in ('euc-jp', 'iso-2022-jp', 'shift_jis'):
+    add_codec(_charset, _find_asian_codec(_charset, 'japanese') or _charset)
+
+for _charset in ('euc-kr', 'cp949', 'iso-2022-kr', 'johab'):
+    add_codec(_charset, _find_asian_codec(_charset, 'korean') or _charset)
+
+
  \f
  class Charset:
      """Map character sets to their email properties.
@@ -229,7 +242,7 @@ class Charset:
          self.input_codec = CODEC_MAP.get(self.input_charset,
                                           self.input_charset)
          self.output_codec = CODEC_MAP.get(self.output_charset,
-                                            self.input_codec)
+                                          self.input_codec)
  
      def __str__(self):
          return self.input_charset.lower()
diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py

index 56d44ea521765ba8abfce58b33bb50bd65448430..bbc19cd2c6dd3172052bbc35d4f3676bc87b671a 100644 (file)
--- a/Lib/email/Generator.py
+++ b/Lib/email/Generator.py
@@ -1,8 +1,7 @@
-# Copyright (C) 2001,2002 Python Software Foundation
-# Author: barry@zope.com (Barry Warsaw)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: barry@python.org (Barry Warsaw)
  
-"""Classes to generate plain text from a message object tree.
-"""
+"""Classes to generate plain text from a message object tree."""
  
  import re
  import sys
@@ -192,9 +191,6 @@ class Generator:
          payload = msg.get_payload()
          if payload is None:
              return
-        cset = msg.get_charset()
-        if cset is not None:
-            payload = cset.body_encode(payload)
          if not _isstring(payload):
              raise TypeError, 'string payload expected: %s' % type(payload)
          if self._mangle_from_:
diff --git a/Lib/email/Message.py b/Lib/email/Message.py

index 10c2921ea0a21cc1c05709c0e41bf132f98045f6..bb8718fe23b07c270cdda2718521873bffca8b4e 100644 (file)
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -272,11 +272,14 @@ class Message:
                              charset=charset.get_output_charset())
          else:
              self.set_param('charset', charset.get_output_charset())
+        if str(charset) <> charset.get_output_charset():
+            self._payload = charset.body_encode(self._payload)
          if not self.has_key('Content-Transfer-Encoding'):
              cte = charset.get_body_encoding()
              if callable(cte):
                  cte(self)
              else:
+                self._payload = charset.body_encode(self._payload)
                  self.add_header('Content-Transfer-Encoding', cte)
  
      def get_charset(self):
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py

index 1925889f67ed2c93e0295396078c87489a9e3bec..edb65e32a884bda13709587541bc9cb8bd6886b0 100644 (file)
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -2073,7 +2073,8 @@ class TestMiscellaneous(unittest.TestCase):
          charset = Charset(charsets[0])
          eq(charset.get_body_encoding(), 'base64')
          msg.set_payload('hello world', charset=charset)
-        eq(msg.get_payload(), 'hello world')
+        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
+        eq(msg.get_payload(decode=True), 'hello world')
          eq(msg['content-transfer-encoding'], 'base64')
          # Try another one
          msg = Message()
diff --git a/Lib/email/test/test_email_codecs.py b/Lib/email/test/test_email_codecs.py

index 99a3227c38c3300ac191ffafcaba0ce8af565fc9..afba94a036f710f264e3ebd38e93a953a94a7908 100644 (file)
--- a/Lib/email/test/test_email_codecs.py
+++ b/Lib/email/test/test_email_codecs.py
@@ -1,17 +1,16 @@
-# Copyright (C) 2002 Python Software Foundation
+# Copyright (C) 2002-2006 Python Software Foundation
  # email package unit tests for (optional) Asian codecs
  
  import unittest
  from test.test_support import TestSkipped, run_unittest
  
  from email.test.test_email import TestEmailBase
-from email.Charset import Charset
+from email.Charset import Charset, _find_asian_codec
  from email.Header import Header, decode_header
+from email.Message import Message
  
  # See if we have the Japanese codecs package installed
-try:
-    unicode('foo', 'japanese.iso-2022-jp')
-except LookupError:
+if not _find_asian_codec('iso-2022-jp', 'japanese'):
      raise TestSkipped, 'Optional Japanese codecs not installed'
  
  
@@ -49,6 +48,14 @@ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
          # TK: full decode comparison
          eq(h.__unicode__().encode('euc-jp'), long)
  
+    def test_payload_encoding(self):
+        jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
+        jcode  = 'euc-jp'
+        msg = Message()
+        msg.set_payload(jhello, jcode)
+        ustr = unicode(msg.get_payload(), msg.get_content_charset())
+        self.assertEqual(jhello, ustr.encode(jcode))
+
  
  \f
  def suite():
author	Barry Warsaw <barry@python.org>
	Wed, 8 Feb 2006 13:33:20 +0000 (13:33 +0000)
committer	Barry Warsaw <barry@python.org>
	Wed, 8 Feb 2006 13:33:20 +0000 (13:33 +0000)
Lib/email/Charset.py		patch \| blob \| blame \| history
Lib/email/Generator.py		patch \| blob \| blame \| history
Lib/email/Message.py		patch \| blob \| blame \| history
Lib/email/test/test_email.py		patch \| blob \| blame \| history
Lib/email/test/test_email_codecs.py		patch \| blob \| blame \| history