Backport email 2.4.2 changes from Python 2.3.

author Barry Warsaw <barry@python.org>

Thu, 10 Oct 2002 19:10:45 +0000 (19:10 +0000)

committer Barry Warsaw <barry@python.org>

Thu, 10 Oct 2002 19:10:45 +0000 (19:10 +0000)
author Barry Warsaw <barry@python.org>
Thu, 10 Oct 2002 19:10:45 +0000 (19:10 +0000)
committer Barry Warsaw <barry@python.org>
Thu, 10 Oct 2002 19:10:45 +0000 (19:10 +0000)
diff --git a/Doc/lib/emailcharsets.tex b/Doc/lib/emailcharsets.tex

index d1ae72804c00446def772ea146c59fd0b07ea354..d654adace1d8efccd11632b9c0a1ad6818aa604f 100644 (file)
--- a/Doc/lib/emailcharsets.tex
+++ b/Doc/lib/emailcharsets.tex
@@ -23,10 +23,11 @@ Certain character sets must be encoded with quoted-printable or base64
  when used in email headers or bodies.  Certain character sets must be
  converted outright, and are not allowed in email.
  
-Optional \var{input_charset} is as described below.  After being alias
-normalized it is also used as a lookup into the registry of character
-sets to find out the header encoding, body encoding, and output
-conversion codec to be used for the character set.  For example, if
+Optional \var{input_charset} is as described below; it is always
+coerced to lower case.  After being alias normalized it is also used
+as a lookup into the registry of character sets to find out the header
+encoding, body encoding, and output conversion codec to be used for
+the character set.  For example, if
  \var{input_charset} is \code{iso-8859-1}, then headers and bodies will
  be encoded using quoted-printable and no output conversion codec is
  necessary.  If \var{input_charset} is \code{euc-jp}, then headers will
diff --git a/Doc/lib/emailmessage.tex b/Doc/lib/emailmessage.tex

index bfd86647cbbdd284bd95e500f6a26501db0a3dfa..34c152db9ac8b79f228d2cd30324d788648b818d 100644 (file)
--- a/Doc/lib/emailmessage.tex
+++ b/Doc/lib/emailmessage.tex
@@ -443,8 +443,9 @@ have been present in the original \mailheader{Content-Type} header.
  
  \begin{methoddesc}[Message]{get_content_charset}{\optional{failobj}}
  Return the \code{charset} parameter of the \mailheader{Content-Type}
-header.  If there is no \mailheader{Content-Type} header, or if that
-header has no \code{charset} parameter, \var{failobj} is returned.
+header, coerced to lower case.  If there is no
+\mailheader{Content-Type} header, or if that header has no
+\code{charset} parameter, \var{failobj} is returned.
  
  Note that this method differs from \method{get_charset()} which
  returns the \class{Charset} instance for the default encoding of the
diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py

index 9a7e5109764005eb435b6bdb3ceb53a578da90a7..67cc1ecb42d08b54affab577277a909a3c474c65 100644 (file)
--- a/Lib/email/Charset.py
+++ b/Lib/email/Charset.py
@@ -177,13 +177,15 @@ class Charset:
                    this attribute will have the same value as the input_codec.
      """
      def __init__(self, input_charset=DEFAULT_CHARSET):
+        # RFC 2046, $4.1.2 says charsets are not case sensitive
+        input_charset = input_charset.lower()
          # Set the input charset after filtering through the aliases
          self.input_charset = ALIASES.get(input_charset, input_charset)
          # We can try to guess which encoding and conversion to use by the
          # charset_map dictionary.  Try that first, but let the user override
          # it.
          henc, benc, conv = CHARSETS.get(self.input_charset,
-                                        (SHORTEST, SHORTEST, None))
+                                        (SHORTEST, BASE64, None))
          # Set the attributes, allowing the arguments to override the default.
          self.header_encoding = henc
          self.body_encoding = benc
diff --git a/Lib/email/Message.py b/Lib/email/Message.py

index 87ab309885cca36245d0e8f66b1785558666165e..16ae12082eea94ed891cc3f077a862457a86bce5 100644 (file)
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -760,8 +760,9 @@ class Message:
      def get_content_charset(self, failobj=None):
          """Return the charset parameter of the Content-Type header.
  
-        If there is no Content-Type header, or if that header has no charset
-        parameter, failobj is returned.
+        The returned string is always coerced to lower case.  If there is no
+        Content-Type header, or if that header has no charset parameter,
+        failobj is returned.
          """
          missing = []
          charset = self.get_param('charset', missing)
@@ -769,8 +770,9 @@ class Message:
              return failobj
          if isinstance(charset, TupleType):
              # RFC 2231 encoded, so decode it, and it better end up as ascii.
-            return unicode(charset[2], charset[0]).encode('us-ascii')
-        return charset
+            charset = unicode(charset[2], charset[0]).encode('us-ascii')
+        # RFC 2046, $4.1.2 says charsets are not case sensitive
+        return charset.lower()
  
      def get_charsets(self, failobj=None):
          """Return a list containing the charset(s) used in this message.
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py

index 2dcf684150b2bf1a1bb33b1474c6086429f2057f..2945b0510f023432fd9bd99ea5571cb3cdf2d387 100644 (file)
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -4,7 +4,7 @@
  """A package for parsing, handling, and generating email messages.
  """
  
-__version__ = '2.4.1'
+__version__ = '2.4.2'
  
  __all__ = [
      'base64MIME',
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py

index 5bbb79416b597f917245ae908cbd50caa7c09e21..daf9e287a71ac10375a5bbad450abfdcb5129ac4 100644 (file)
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -1689,6 +1689,40 @@ class TestMiscellaneous(unittest.TestCase):
                         filename='foo\\wacky"name')
          eq(msg.get_filename(), 'foo\\wacky"name')
  
+    def test_get_body_encoding_with_bogus_charset(self):
+        charset = Charset('not a charset')
+        self.assertEqual(charset.get_body_encoding(), 'base64')
+
+    def test_get_body_encoding_with_uppercase_charset(self):
+        eq = self.assertEqual
+        msg = Message()
+        msg['Content-Type'] = 'text/plain; charset=UTF-8'
+        eq(msg['content-type'], 'text/plain; charset=UTF-8')
+        charsets = msg.get_charsets()
+        eq(len(charsets), 1)
+        eq(charsets[0], 'utf-8')
+        charset = Charset(charsets[0])
+        eq(charset.get_body_encoding(), 'base64')
+        msg.set_payload('hello world', charset=charset)
+        eq(msg.get_payload(), 'hello world')
+        eq(msg['content-transfer-encoding'], 'base64')
+        # Try another one
+        msg = Message()
+        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
+        charsets = msg.get_charsets()
+        eq(len(charsets), 1)
+        eq(charsets[0], 'us-ascii')
+        charset = Charset(charsets[0])
+        eq(charset.get_body_encoding(), Encoders.encode_7or8bit)
+        msg.set_payload('hello world', charset=charset)
+        eq(msg.get_payload(), 'hello world')
+        eq(msg['content-transfer-encoding'], '7bit')
+
+    def test_charsets_case_insensitive(self):
+        lc = Charset('us-ascii')
+        uc = Charset('US-ASCII')
+        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
+
  
  \f
  # Test the iterator/generators
author	Barry Warsaw <barry@python.org>
	Thu, 10 Oct 2002 19:10:45 +0000 (19:10 +0000)
committer	Barry Warsaw <barry@python.org>
	Thu, 10 Oct 2002 19:10:45 +0000 (19:10 +0000)
Doc/lib/emailcharsets.tex		patch \| blob \| blame \| history
Doc/lib/emailmessage.tex		patch \| blob \| blame \| history
Lib/email/Charset.py		patch \| blob \| blame \| history
Lib/email/Message.py		patch \| blob \| blame \| history
Lib/email/__init__.py		patch \| blob \| blame \| history
Lib/email/test/test_email.py		patch \| blob \| blame \| history