-# Copyright (C) 2001,2002 Python Software Foundation
-# Author: barry@zope.com (Barry Warsaw)
+# Copyright (C) 2001-2005 Python Software Foundation
+# Author: barry@python.org (Barry Warsaw)
-"""Basic message object for the email package object model.
-"""
+"""Basic message object for the email package object model."""
import re
import uu
if isinstance(filename, TupleType):
# It's an RFC 2231 encoded parameter
newvalue = _unquotevalue(filename)
- return unicode(newvalue[2], newvalue[0] or 'us-ascii')
+ try:
+ return unicode(newvalue[2], newvalue[0] or 'us-ascii')
+ # LookupError can get raised if the charset isn't known to Python.
+ # UnicodeError can get raised if the encoded text contains a
+ # character not in the charset.
+ except (LookupError, UnicodeError):
+ return newvalue[2]
else:
newvalue = _unquotevalue(filename.strip())
return newvalue
if isinstance(charset, TupleType):
# RFC 2231 encoded, so decode it, and it better end up as ascii.
pcharset = charset[0] or 'us-ascii'
- charset = unicode(charset[2], pcharset).encode('us-ascii')
+ try:
+ charset = unicode(charset[2], pcharset).encode('us-ascii')
+ # LookupError can get raised if the charset isn't known to Python.
+ # UnicodeError can get raised if the encoded text contains a
+ # character not in the charset.
+ except (LookupError, UnicodeError):
+ charset = charset[2]
+ # charset characters should be in us-ascii range
+ try:
+ charset = unicode(charset, 'us-ascii').encode('us-ascii')
+ except UnicodeError:
+ return failobj
# RFC 2046, $4.1.2 says charsets are not case sensitive
return charset.lower()
-# Copyright (C) 2001-2004 Python Software Foundation
+# Copyright (C) 2001-2005 Python Software Foundation
# Author: barry@python.org (Barry Warsaw)
"""A package for parsing, handling, and generating email messages.
"""
-__version__ = '2.5.5'
+__version__ = '2.5.6'
__all__ = [
'base64MIME',
-# Copyright (C) 2001,2002,2003 Python Software Foundation
+# Copyright (C) 2001-2005 Python Software Foundation
# email package unit tests
import os
self.assertEqual(msg.get_content_charset(),
'this is even more ***fun*** is it not.pdf')
+ def test_rfc2231_bad_encoding_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0="bogus'xx'This%20is%20even%20more%20";
+\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+
+ def test_rfc2231_bad_encoding_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
+
+"""
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+
+ def test_rfc2231_bad_character_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
+
+"""
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+
+ def test_rfc2231_bad_character_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0="ascii'xx'This%20is%20even%20more%20";
+\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf%E2"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf\xe2')
+
\f
def _testclasses():