when used in email headers or bodies. Certain character sets must be
converted outright, and are not allowed in email.
-Optional \var{input_charset} is as described below. After being alias
-normalized it is also used as a lookup into the registry of character
-sets to find out the header encoding, body encoding, and output
-conversion codec to be used for the character set. For example, if
+Optional \var{input_charset} is as described below; it is always
+coerced to lower case. After being alias normalized it is also used
+as a lookup into the registry of character sets to find out the header
+encoding, body encoding, and output conversion codec to be used for
+the character set. For example, if
\var{input_charset} is \code{iso-8859-1}, then headers and bodies will
be encoded using quoted-printable and no output conversion codec is
necessary. If \var{input_charset} is \code{euc-jp}, then headers will
\begin{methoddesc}[Message]{get_content_charset}{\optional{failobj}}
Return the \code{charset} parameter of the \mailheader{Content-Type}
-header. If there is no \mailheader{Content-Type} header, or if that
-header has no \code{charset} parameter, \var{failobj} is returned.
+header, coerced to lower case. If there is no
+\mailheader{Content-Type} header, or if that header has no
+\code{charset} parameter, \var{failobj} is returned.
Note that this method differs from \method{get_charset()} which
returns the \class{Charset} instance for the default encoding of the
this attribute will have the same value as the input_codec.
"""
def __init__(self, input_charset=DEFAULT_CHARSET):
+ # RFC 2046, $4.1.2 says charsets are not case sensitive
+ input_charset = input_charset.lower()
# Set the input charset after filtering through the aliases
self.input_charset = ALIASES.get(input_charset, input_charset)
# We can try to guess which encoding and conversion to use by the
# charset_map dictionary. Try that first, but let the user override
# it.
henc, benc, conv = CHARSETS.get(self.input_charset,
- (SHORTEST, SHORTEST, None))
+ (SHORTEST, BASE64, None))
# Set the attributes, allowing the arguments to override the default.
self.header_encoding = henc
self.body_encoding = benc
def get_content_charset(self, failobj=None):
"""Return the charset parameter of the Content-Type header.
- If there is no Content-Type header, or if that header has no charset
- parameter, failobj is returned.
+ The returned string is always coerced to lower case. If there is no
+ Content-Type header, or if that header has no charset parameter,
+ failobj is returned.
"""
missing = []
charset = self.get_param('charset', missing)
return failobj
if isinstance(charset, TupleType):
# RFC 2231 encoded, so decode it, and it better end up as ascii.
- return unicode(charset[2], charset[0]).encode('us-ascii')
- return charset
+ charset = unicode(charset[2], charset[0]).encode('us-ascii')
+ # RFC 2046, $4.1.2 says charsets are not case sensitive
+ return charset.lower()
def get_charsets(self, failobj=None):
"""Return a list containing the charset(s) used in this message.
"""A package for parsing, handling, and generating email messages.
"""
-__version__ = '2.4.1'
+__version__ = '2.4.2'
__all__ = [
'base64MIME',
filename='foo\\wacky"name')
eq(msg.get_filename(), 'foo\\wacky"name')
+ def test_get_body_encoding_with_bogus_charset(self):
+ charset = Charset('not a charset')
+ self.assertEqual(charset.get_body_encoding(), 'base64')
+
+ def test_get_body_encoding_with_uppercase_charset(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg['Content-Type'] = 'text/plain; charset=UTF-8'
+ eq(msg['content-type'], 'text/plain; charset=UTF-8')
+ charsets = msg.get_charsets()
+ eq(len(charsets), 1)
+ eq(charsets[0], 'utf-8')
+ charset = Charset(charsets[0])
+ eq(charset.get_body_encoding(), 'base64')
+ msg.set_payload('hello world', charset=charset)
+ eq(msg.get_payload(), 'hello world')
+ eq(msg['content-transfer-encoding'], 'base64')
+ # Try another one
+ msg = Message()
+ msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
+ charsets = msg.get_charsets()
+ eq(len(charsets), 1)
+ eq(charsets[0], 'us-ascii')
+ charset = Charset(charsets[0])
+ eq(charset.get_body_encoding(), Encoders.encode_7or8bit)
+ msg.set_payload('hello world', charset=charset)
+ eq(msg.get_payload(), 'hello world')
+ eq(msg['content-transfer-encoding'], '7bit')
+
+ def test_charsets_case_insensitive(self):
+ lc = Charset('us-ascii')
+ uc = Charset('US-ASCII')
+ self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
+
\f
# Test the iterator/generators