From: Barry Warsaw <barry@python.org>
Date: Tue, 19 Aug 2003 04:56:46 +0000 (+0000)
Subject: Backporting email 2.5.4 fixes from the trunk.
X-Git-Tag: v2.3.1~138
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=89af2f65dc80c4dfcfa77fcdd50a12e3daa6a4fc;p=thirdparty%2FPython%2Fcpython.git

Backporting email 2.5.4 fixes from the trunk.
---

diff --git a/Lib/email/Message.py b/Lib/email/Message.py
index 6dfa84b44680..6bba6aeb9811 100644
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -571,13 +571,16 @@ class Message:
         Parameter keys are always compared case insensitively.  The return
         value can either be a string, or a 3-tuple if the parameter was RFC
         2231 encoded.  When it's a 3-tuple, the elements of the value are of
-        the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
-        string.  Your application should be prepared to deal with these, and
-        can convert the parameter to a Unicode string like so:
+        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
+        LANGUAGE can be None, in which case you should consider VALUE to be
+        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
+
+        Your application should be prepared to deal with 3-tuple return
+        values, and can convert the parameter to a Unicode string like so:
 
             param = msg.get_param('foo')
             if isinstance(param, tuple):
-                param = unicode(param[2], param[0])
+                param = unicode(param[2], param[0] or 'us-ascii')
 
         In any case, the parameter value (either the returned string, or the
         VALUE item in the 3-tuple) is always unquoted, unless unquote is set
@@ -708,7 +711,7 @@ class Message:
         if isinstance(filename, TupleType):
             # It's an RFC 2231 encoded parameter
             newvalue = _unquotevalue(filename)
-            return unicode(newvalue[2], newvalue[0])
+            return unicode(newvalue[2], newvalue[0] or 'us-ascii')
         else:
             newvalue = _unquotevalue(filename.strip())
             return newvalue
@@ -725,7 +728,8 @@ class Message:
             return failobj
         if isinstance(boundary, TupleType):
             # RFC 2231 encoded, so decode.  It better end up as ascii
-            return unicode(boundary[2], boundary[0]).encode('us-ascii')
+            charset = boundary[0] or 'us-ascii'
+            return unicode(boundary[2], charset).encode('us-ascii')
         return _unquotevalue(boundary.strip())
 
     def set_boundary(self, boundary):
@@ -792,7 +796,8 @@ class Message:
             return failobj
         if isinstance(charset, TupleType):
             # RFC 2231 encoded, so decode it, and it better end up as ascii.
-            charset = unicode(charset[2], charset[0]).encode('us-ascii')
+            pcharset = charset[0] or 'us-ascii'
+            charset = unicode(charset[2], pcharset).encode('us-ascii')
         # RFC 2046, $4.1.2 says charsets are not case sensitive
         return charset.lower()
 
diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py
index 2b8b94fec49b..a409e16e9161 100644
--- a/Lib/email/Utils.py
+++ b/Lib/email/Utils.py
@@ -280,7 +280,7 @@ def decode_rfc2231(s):
     import urllib
     parts = s.split("'", 2)
     if len(parts) == 1:
-        return None, None, s
+        return None, None, urllib.unquote(s)
     charset, language, s = parts
     return charset, language, urllib.unquote(s)
 
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
index b5d8d72eee7d..bfd610552931 100644
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -4,7 +4,7 @@
 """A package for parsing, handling, and generating email messages.
 """
 
-__version__ = '2.5.3'
+__version__ = '2.5.4'
 
 __all__ = [
     'base64MIME',
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 7e630bf35c9c..a14199d9ac09 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -2659,6 +2659,43 @@ Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOC
         self.assertEqual(msg.get_param('NAME'),
                          (None, None, 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm'))
 
+    def test_rfc2231_no_language_or_charset_in_filename(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0="This%20is%20even%20more%20";
+\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_filename(),
+                         'This is even more ***fun*** is it not.pdf')
+
+    def test_rfc2231_no_language_or_charset_in_boundary(self):
+        m = '''\
+Content-Type: multipart/alternative;
+\tboundary*0="This%20is%20even%20more%20";
+\tboundary*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tboundary*2="is it not.pdf"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_boundary(),
+                         'This is even more ***fun*** is it not.pdf')
+
+    def test_rfc2231_no_language_or_charset_in_charset(self):
+        # This is a nonsensical charset value, but tests the code anyway
+        m = '''\
+Content-Type: text/plain;
+\tcharset*0="This%20is%20even%20more%20";
+\tcharset*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tcharset*2="is it not.pdf"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_content_charset(),
+                         'this is even more ***fun*** is it not.pdf')
+
 
 
 def _testclasses():