From bf503905a7910194076c934e0e3ba3e10fa1654d Mon Sep 17 00:00:00 2001
From: Barry Warsaw <barry@python.org>
Date: Thu, 10 Oct 2002 19:10:45 +0000
Subject: [PATCH] Backport email 2.4.2 changes from Python 2.3.

---
 Doc/lib/emailcharsets.tex    |  9 +++++----
 Doc/lib/emailmessage.tex     |  5 +++--
 Lib/email/Charset.py         |  4 +++-
 Lib/email/Message.py         | 10 ++++++----
 Lib/email/__init__.py        |  2 +-
 Lib/email/test/test_email.py | 34 ++++++++++++++++++++++++++++++++++
 6 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/Doc/lib/emailcharsets.tex b/Doc/lib/emailcharsets.tex
index d1ae72804c00..d654adace1d8 100644
--- a/Doc/lib/emailcharsets.tex
+++ b/Doc/lib/emailcharsets.tex
@@ -23,10 +23,11 @@ Certain character sets must be encoded with quoted-printable or base64
 when used in email headers or bodies.  Certain character sets must be
 converted outright, and are not allowed in email.
 
-Optional \var{input_charset} is as described below.  After being alias
-normalized it is also used as a lookup into the registry of character
-sets to find out the header encoding, body encoding, and output
-conversion codec to be used for the character set.  For example, if
+Optional \var{input_charset} is as described below; it is always
+coerced to lower case.  After being alias normalized it is also used
+as a lookup into the registry of character sets to find out the header
+encoding, body encoding, and output conversion codec to be used for
+the character set.  For example, if
 \var{input_charset} is \code{iso-8859-1}, then headers and bodies will
 be encoded using quoted-printable and no output conversion codec is
 necessary.  If \var{input_charset} is \code{euc-jp}, then headers will
diff --git a/Doc/lib/emailmessage.tex b/Doc/lib/emailmessage.tex
index bfd86647cbbd..34c152db9ac8 100644
--- a/Doc/lib/emailmessage.tex
+++ b/Doc/lib/emailmessage.tex
@@ -443,8 +443,9 @@ have been present in the original \mailheader{Content-Type} header.
 
 \begin{methoddesc}[Message]{get_content_charset}{\optional{failobj}}
 Return the \code{charset} parameter of the \mailheader{Content-Type}
-header.  If there is no \mailheader{Content-Type} header, or if that
-header has no \code{charset} parameter, \var{failobj} is returned.
+header, coerced to lower case.  If there is no
+\mailheader{Content-Type} header, or if that header has no
+\code{charset} parameter, \var{failobj} is returned.
 
 Note that this method differs from \method{get_charset()} which
 returns the \class{Charset} instance for the default encoding of the
diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py
index 9a7e51097640..67cc1ecb42d0 100644
--- a/Lib/email/Charset.py
+++ b/Lib/email/Charset.py
@@ -177,13 +177,15 @@ class Charset:
                   this attribute will have the same value as the input_codec.
     """
     def __init__(self, input_charset=DEFAULT_CHARSET):
+        # RFC 2046, $4.1.2 says charsets are not case sensitive
+        input_charset = input_charset.lower()
         # Set the input charset after filtering through the aliases
         self.input_charset = ALIASES.get(input_charset, input_charset)
         # We can try to guess which encoding and conversion to use by the
         # charset_map dictionary.  Try that first, but let the user override
         # it.
         henc, benc, conv = CHARSETS.get(self.input_charset,
-                                        (SHORTEST, SHORTEST, None))
+                                        (SHORTEST, BASE64, None))
         # Set the attributes, allowing the arguments to override the default.
         self.header_encoding = henc
         self.body_encoding = benc
diff --git a/Lib/email/Message.py b/Lib/email/Message.py
index 87ab309885cc..16ae12082eea 100644
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -760,8 +760,9 @@ class Message:
     def get_content_charset(self, failobj=None):
         """Return the charset parameter of the Content-Type header.
 
-        If there is no Content-Type header, or if that header has no charset
-        parameter, failobj is returned.
+        The returned string is always coerced to lower case.  If there is no
+        Content-Type header, or if that header has no charset parameter,
+        failobj is returned.
         """
         missing = []
         charset = self.get_param('charset', missing)
@@ -769,8 +770,9 @@ class Message:
             return failobj
         if isinstance(charset, TupleType):
             # RFC 2231 encoded, so decode it, and it better end up as ascii.
-            return unicode(charset[2], charset[0]).encode('us-ascii')
-        return charset
+            charset = unicode(charset[2], charset[0]).encode('us-ascii')
+        # RFC 2046, $4.1.2 says charsets are not case sensitive
+        return charset.lower()
 
     def get_charsets(self, failobj=None):
         """Return a list containing the charset(s) used in this message.
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
index 2dcf684150b2..2945b0510f02 100644
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -4,7 +4,7 @@
 """A package for parsing, handling, and generating email messages.
 """
 
-__version__ = '2.4.1'
+__version__ = '2.4.2'
 
 __all__ = [
     'base64MIME',
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 5bbb79416b59..daf9e287a71a 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -1689,6 +1689,40 @@ class TestMiscellaneous(unittest.TestCase):
                        filename='foo\\wacky"name')
         eq(msg.get_filename(), 'foo\\wacky"name')
 
+    def test_get_body_encoding_with_bogus_charset(self):
+        charset = Charset('not a charset')
+        self.assertEqual(charset.get_body_encoding(), 'base64')
+
+    def test_get_body_encoding_with_uppercase_charset(self):
+        eq = self.assertEqual
+        msg = Message()
+        msg['Content-Type'] = 'text/plain; charset=UTF-8'
+        eq(msg['content-type'], 'text/plain; charset=UTF-8')
+        charsets = msg.get_charsets()
+        eq(len(charsets), 1)
+        eq(charsets[0], 'utf-8')
+        charset = Charset(charsets[0])
+        eq(charset.get_body_encoding(), 'base64')
+        msg.set_payload('hello world', charset=charset)
+        eq(msg.get_payload(), 'hello world')
+        eq(msg['content-transfer-encoding'], 'base64')
+        # Try another one
+        msg = Message()
+        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
+        charsets = msg.get_charsets()
+        eq(len(charsets), 1)
+        eq(charsets[0], 'us-ascii')
+        charset = Charset(charsets[0])
+        eq(charset.get_body_encoding(), Encoders.encode_7or8bit)
+        msg.set_payload('hello world', charset=charset)
+        eq(msg.get_payload(), 'hello world')
+        eq(msg['content-transfer-encoding'], '7bit')
+
+    def test_charsets_case_insensitive(self):
+        lc = Charset('us-ascii')
+        uc = Charset('US-ASCII')
+        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
+
 
 
 # Test the iterator/generators
-- 
2.47.3