-# Copyright (C) 2001,2002 Python Software Foundation
-# Author: che@debian.org (Ben Gertzfield), barry@zope.com (Barry Warsaw)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield), barry@python.org (Barry Warsaw)
from types import UnicodeType
from email.Encoders import encode_7or8bit
# of stability and useability.
CODEC_MAP = {
- 'euc-jp': 'japanese.euc-jp',
- 'iso-2022-jp': 'japanese.iso-2022-jp',
- 'shift_jis': 'japanese.shift_jis',
- 'euc-kr': 'korean.euc-kr',
- 'ks_c_5601-1987': 'korean.cp949',
- 'iso-2022-kr': 'korean.iso-2022-kr',
- 'johab': 'korean.johab',
- 'gb2132': 'eucgb2312_cn',
- 'big5': 'big5_tw',
- 'utf-8': 'utf-8',
+ 'gb2132': 'eucgb2312_cn',
+ 'big5': 'big5_tw',
+ 'utf-8': 'utf-8',
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
# Let that stuff pass through without conversion to/from Unicode.
- 'us-ascii': None,
+ 'us-ascii': None,
}
CODEC_MAP[charset] = codecname
+def _find_asian_codec(charset, language):
+ try:
+ unicode('foo', charset)
+ return charset
+ except LookupError:
+ try:
+ codec = language + '.' + charset
+ unicode('foo', codec)
+ return codec
+ except LookupError:
+ return None
+
+
+for _charset in ('euc-jp', 'iso-2022-jp', 'shift_jis'):
+ add_codec(_charset, _find_asian_codec(_charset, 'japanese') or _charset)
+
+for _charset in ('euc-kr', 'cp949', 'iso-2022-kr', 'johab'):
+ add_codec(_charset, _find_asian_codec(_charset, 'korean') or _charset)
+
+
\f
class Charset:
"""Map character sets to their email properties.
self.input_codec = CODEC_MAP.get(self.input_charset,
self.input_charset)
self.output_codec = CODEC_MAP.get(self.output_charset,
- self.input_codec)
+ self.input_codec)
def __str__(self):
return self.input_charset.lower()
-# Copyright (C) 2001,2002 Python Software Foundation
-# Author: barry@zope.com (Barry Warsaw)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: barry@python.org (Barry Warsaw)
-"""Classes to generate plain text from a message object tree.
-"""
+"""Classes to generate plain text from a message object tree."""
import re
import sys
payload = msg.get_payload()
if payload is None:
return
- cset = msg.get_charset()
- if cset is not None:
- payload = cset.body_encode(payload)
if not _isstring(payload):
raise TypeError, 'string payload expected: %s' % type(payload)
if self._mangle_from_:
charset=charset.get_output_charset())
else:
self.set_param('charset', charset.get_output_charset())
+ if str(charset) <> charset.get_output_charset():
+ self._payload = charset.body_encode(self._payload)
if not self.has_key('Content-Transfer-Encoding'):
cte = charset.get_body_encoding()
if callable(cte):
cte(self)
else:
+ self._payload = charset.body_encode(self._payload)
self.add_header('Content-Transfer-Encoding', cte)
def get_charset(self):
charset = Charset(charsets[0])
eq(charset.get_body_encoding(), 'base64')
msg.set_payload('hello world', charset=charset)
- eq(msg.get_payload(), 'hello world')
+ eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
+ eq(msg.get_payload(decode=True), 'hello world')
eq(msg['content-transfer-encoding'], 'base64')
# Try another one
msg = Message()
-# Copyright (C) 2002 Python Software Foundation
+# Copyright (C) 2002-2006 Python Software Foundation
# email package unit tests for (optional) Asian codecs
import unittest
from test.test_support import TestSkipped, run_unittest
from email.test.test_email import TestEmailBase
-from email.Charset import Charset
+from email.Charset import Charset, _find_asian_codec
from email.Header import Header, decode_header
+from email.Message import Message
# See if we have the Japanese codecs package installed
-try:
- unicode('foo', 'japanese.iso-2022-jp')
-except LookupError:
+if not _find_asian_codec('iso-2022-jp', 'japanese'):
raise TestSkipped, 'Optional Japanese codecs not installed'
# TK: full decode comparison
eq(h.__unicode__().encode('euc-jp'), long)
+ def test_payload_encoding(self):
+ jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
+ jcode = 'euc-jp'
+ msg = Message()
+ msg.set_payload(jhello, jcode)
+ ustr = unicode(msg.get_payload(), msg.get_content_charset())
+ self.assertEqual(jhello, ustr.encode(jcode))
+
\f
def suite():