get_filename(), get_content_charset(): It's possible that the charset named in

author Barry Warsaw <barry@python.org>

Fri, 29 Apr 2005 12:12:02 +0000 (12:12 +0000)

committer Barry Warsaw <barry@python.org>

Fri, 29 Apr 2005 12:12:02 +0000 (12:12 +0000)
author Barry Warsaw <barry@python.org>
Fri, 29 Apr 2005 12:12:02 +0000 (12:12 +0000)
committer Barry Warsaw <barry@python.org>
Fri, 29 Apr 2005 12:12:02 +0000 (12:12 +0000)
diff --git a/Lib/email/Message.py b/Lib/email/Message.py

index 5b76e850b8dd5e286a67cd4a235985d3e8fc09ab..13963301b85be71f9e319c4533b719fae64edd0a 100644 (file)
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -1,8 +1,7 @@
-# Copyright (C) 2001,2002 Python Software Foundation
-# Author: barry@zope.com (Barry Warsaw)
+# Copyright (C) 2001-2005 Python Software Foundation
+# Author: barry@python.org (Barry Warsaw)
  
-"""Basic message object for the email package object model.
-"""
+"""Basic message object for the email package object model."""
  
  import re
  import uu
@@ -728,7 +727,13 @@ class Message:
          if isinstance(filename, TupleType):
              # It's an RFC 2231 encoded parameter
              newvalue = _unquotevalue(filename)
-            return unicode(newvalue[2], newvalue[0] or 'us-ascii')
+            try:
+                return unicode(newvalue[2], newvalue[0] or 'us-ascii')
+            # LookupError can get raised if the charset isn't known to Python.
+            # UnicodeError can get raised if the encoded text contains a
+            # character not in the charset.
+            except (LookupError, UnicodeError):
+                return newvalue[2]
          else:
              newvalue = _unquotevalue(filename.strip())
              return newvalue
@@ -815,7 +820,18 @@ class Message:
          if isinstance(charset, TupleType):
              # RFC 2231 encoded, so decode it, and it better end up as ascii.
              pcharset = charset[0] or 'us-ascii'
-            charset = unicode(charset[2], pcharset).encode('us-ascii')
+            try:
+                charset = unicode(charset[2], pcharset).encode('us-ascii')
+            # LookupError can get raised if the charset isn't known to Python.
+            # UnicodeError can get raised if the encoded text contains a
+            # character not in the charset.
+            except (LookupError, UnicodeError):
+                charset = charset[2]
+        # charset characters should be in us-ascii range
+        try:
+            charset = unicode(charset, 'us-ascii').encode('us-ascii')
+        except UnicodeError:
+            return failobj
          # RFC 2046, $4.1.2 says charsets are not case sensitive
          return charset.lower()
  
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py

index a18c90e2615233800d91a0c32eb0d89ed61be7ae..bc829c25cbca5d730fa4cacc09ef099066f66c66 100644 (file)
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -1,10 +1,10 @@
-# Copyright (C) 2001-2004 Python Software Foundation
+# Copyright (C) 2001-2005 Python Software Foundation
  # Author: barry@python.org (Barry Warsaw)
  
  """A package for parsing, handling, and generating email messages.
  """
  
-__version__ = '2.5.5'
+__version__ = '2.5.6'
  
  __all__ = [
      'base64MIME',
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py

index c69c25871ca9256484a33740ac00d4c1a7c1e76a..ad16eab91bb20d7b2771d337eba11d54e51dda85 100644 (file)
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2001,2002,2003 Python Software Foundation
+# Copyright (C) 2001-2005 Python Software Foundation
  # email package unit tests
  
  import os
@@ -2758,6 +2758,50 @@ Content-Type: text/plain;
          self.assertEqual(msg.get_content_charset(),
                           'this is even more ***fun*** is it not.pdf')
  
+    def test_rfc2231_bad_encoding_in_filename(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0="bogus'xx'This%20is%20even%20more%20";
+\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_filename(),
+                         'This is even more ***fun*** is it not.pdf')
+
+    def test_rfc2231_bad_encoding_in_charset(self):
+        m = """\
+Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
+
+"""
+        msg = email.message_from_string(m)
+        # This should return None because non-ascii characters in the charset
+        # are not allowed.
+        self.assertEqual(msg.get_content_charset(), None)
+
+    def test_rfc2231_bad_character_in_charset(self):
+        m = """\
+Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
+
+"""
+        msg = email.message_from_string(m)
+        # This should return None because non-ascii characters in the charset
+        # are not allowed.
+        self.assertEqual(msg.get_content_charset(), None)
+
+    def test_rfc2231_bad_character_in_filename(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0="ascii'xx'This%20is%20even%20more%20";
+\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf%E2"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_filename(),
+                         'This is even more ***fun*** is it not.pdf\xe2')
+
  
  \f
  def _testclasses():
author	Barry Warsaw <barry@python.org>
	Fri, 29 Apr 2005 12:12:02 +0000 (12:12 +0000)
committer	Barry Warsaw <barry@python.org>
	Fri, 29 Apr 2005 12:12:02 +0000 (12:12 +0000)
Lib/email/Message.py		patch \| blob \| blame \| history
Lib/email/__init__.py		patch \| blob \| blame \| history
Lib/email/test/test_email.py		patch \| blob \| blame \| history