From: Barry Warsaw Date: Fri, 29 Apr 2005 12:12:02 +0000 (+0000) Subject: get_filename(), get_content_charset(): It's possible that the charset named in X-Git-Tag: v2.3.6c1~16 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=712d474d3c32a7675431e518a5f2ebd4551d8903;p=thirdparty%2FPython%2Fcpython.git get_filename(), get_content_charset(): It's possible that the charset named in an RFC 2231-style header could be bogus or unknown to Python. In that case, we return the the text part of the parameter undecoded. However, in get_content_charset(), if that is not ascii, then it is an illegal charset and so we return failobj. Test cases and a version bump are included. Committing this to the Python 2.3 branch because I need to generate an email 2.5.6 release that contains these patches. I will port these fixes to Python 2.4 and 2.5 for email 3.x. --- diff --git a/Lib/email/Message.py b/Lib/email/Message.py index 5b76e850b8dd..13963301b85b 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -1,8 +1,7 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2005 Python Software Foundation +# Author: barry@python.org (Barry Warsaw) -"""Basic message object for the email package object model. -""" +"""Basic message object for the email package object model.""" import re import uu @@ -728,7 +727,13 @@ class Message: if isinstance(filename, TupleType): # It's an RFC 2231 encoded parameter newvalue = _unquotevalue(filename) - return unicode(newvalue[2], newvalue[0] or 'us-ascii') + try: + return unicode(newvalue[2], newvalue[0] or 'us-ascii') + # LookupError can get raised if the charset isn't known to Python. + # UnicodeError can get raised if the encoded text contains a + # character not in the charset. + except (LookupError, UnicodeError): + return newvalue[2] else: newvalue = _unquotevalue(filename.strip()) return newvalue @@ -815,7 +820,18 @@ class Message: if isinstance(charset, TupleType): # RFC 2231 encoded, so decode it, and it better end up as ascii. pcharset = charset[0] or 'us-ascii' - charset = unicode(charset[2], pcharset).encode('us-ascii') + try: + charset = unicode(charset[2], pcharset).encode('us-ascii') + # LookupError can get raised if the charset isn't known to Python. + # UnicodeError can get raised if the encoded text contains a + # character not in the charset. + except (LookupError, UnicodeError): + charset = charset[2] + # charset characters should be in us-ascii range + try: + charset = unicode(charset, 'us-ascii').encode('us-ascii') + except UnicodeError: + return failobj # RFC 2046, $4.1.2 says charsets are not case sensitive return charset.lower() diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index a18c90e26152..bc829c25cbca 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -1,10 +1,10 @@ -# Copyright (C) 2001-2004 Python Software Foundation +# Copyright (C) 2001-2005 Python Software Foundation # Author: barry@python.org (Barry Warsaw) """A package for parsing, handling, and generating email messages. """ -__version__ = '2.5.5' +__version__ = '2.5.6' __all__ = [ 'base64MIME', diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index c69c25871ca9..ad16eab91bb2 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001,2002,2003 Python Software Foundation +# Copyright (C) 2001-2005 Python Software Foundation # email package unit tests import os @@ -2758,6 +2758,50 @@ Content-Type: text/plain; self.assertEqual(msg.get_content_charset(), 'this is even more ***fun*** is it not.pdf') + def test_rfc2231_bad_encoding_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0="bogus'xx'This%20is%20even%20more%20"; +\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_bad_encoding_in_charset(self): + m = """\ +Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D + +""" + msg = email.message_from_string(m) + # This should return None because non-ascii characters in the charset + # are not allowed. + self.assertEqual(msg.get_content_charset(), None) + + def test_rfc2231_bad_character_in_charset(self): + m = """\ +Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D + +""" + msg = email.message_from_string(m) + # This should return None because non-ascii characters in the charset + # are not allowed. + self.assertEqual(msg.get_content_charset(), None) + + def test_rfc2231_bad_character_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0="ascii'xx'This%20is%20even%20more%20"; +\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf%E2" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf\xe2') + def _testclasses():