Backporting email 2.5 to Python 2.2 maint branch.

author Barry Warsaw <barry@python.org>

Fri, 21 Mar 2003 21:09:32 +0000 (21:09 +0000)

committer Barry Warsaw <barry@python.org>

Fri, 21 Mar 2003 21:09:32 +0000 (21:09 +0000)
author Barry Warsaw <barry@python.org>
Fri, 21 Mar 2003 21:09:32 +0000 (21:09 +0000)
committer Barry Warsaw <barry@python.org>
Fri, 21 Mar 2003 21:09:32 +0000 (21:09 +0000)
diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py

index b852245b00af024b46fe728d6a2a5d2f3b76812c..dd328e050152ee352f360171972b4f1cef6d3f7b 100644 (file)
--- a/Lib/email/Charset.py
+++ b/Lib/email/Charset.py
@@ -35,6 +35,20 @@ CHARSETS = {
      # input        header enc  body enc output conv
      'iso-8859-1':  (QP,        QP,      None),
      'iso-8859-2':  (QP,        QP,      None),
+    'iso-8859-3':  (QP,        QP,      None),
+    'iso-8859-4':  (QP,        QP,      None),
+    # iso-8859-5 is Cyrillic, and not especially used
+    # iso-8859-6 is Arabic, also not particularly used
+    # iso-8859-7 is Greek, QP will not make it readable
+    # iso-8859-8 is Hebrew, QP will not make it readable
+    'iso-8859-9':  (QP,        QP,      None),
+    'iso-8859-10': (QP,        QP,      None),
+    # iso-8859-11 is Thai, QP will not make it readable
+    'iso-8859-13': (QP,        QP,      None),
+    'iso-8859-14': (QP,        QP,      None),
+    'iso-8859-15': (QP,        QP,      None),
+    'windows-1252':(QP,        QP,      None),
+    'viscii':      (QP,        QP,      None),
      'us-ascii':    (None,      None,    None),
      'big5':        (BASE64,    BASE64,  None),
      'gb2312':      (BASE64,    BASE64,  None),
@@ -52,6 +66,25 @@ CHARSETS = {
  ALIASES = {
      'latin_1': 'iso-8859-1',
      'latin-1': 'iso-8859-1',
+    'latin_2': 'iso-8859-2',
+    'latin-2': 'iso-8859-2',
+    'latin_3': 'iso-8859-3',
+    'latin-3': 'iso-8859-3',
+    'latin_4': 'iso-8859-4',
+    'latin-4': 'iso-8859-4',
+    'latin_5': 'iso-8859-9',
+    'latin-5': 'iso-8859-9',
+    'latin_6': 'iso-8859-10',
+    'latin-6': 'iso-8859-10',
+    'latin_7': 'iso-8859-13',
+    'latin-7': 'iso-8859-13',
+    'latin_8': 'iso-8859-14',
+    'latin-8': 'iso-8859-14',
+    'latin_9': 'iso-8859-15',
+    'latin-9': 'iso-8859-15',
+    'cp949':   'ks_c_5601-1987',
+    'euc_jp':  'euc-jp',
+    'euc_kr':  'euc-kr',
      'ascii':   'us-ascii',
      }
  
@@ -69,6 +102,10 @@ CODEC_MAP = {
      'euc-jp':      'japanese.euc-jp',
      'iso-2022-jp': 'japanese.iso-2022-jp',
      'shift_jis':   'japanese.shift_jis',
+    'euc-kr':      'korean.euc-kr',
+    'ks_c_5601-1987': 'korean.cp949',
+    'iso-2022-kr': 'korean.iso-2022-kr',
+    'johab':       'korean.johab',
      'gb2132':      'eucgb2312_cn',
      'big5':        'big5_tw',
      'utf-8':       'utf-8',
@@ -197,6 +234,8 @@ class Charset:
      def __str__(self):
          return self.input_charset.lower()
  
+    __repr__ = __str__
+
      def __eq__(self, other):
          return str(self) == str(other).lower()
  
@@ -321,14 +360,14 @@ class Charset:
          if self.header_encoding == BASE64:
              return email.base64MIME.header_encode(s, cset)
          elif self.header_encoding == QP:
-            return email.quopriMIME.header_encode(s, cset)
+            return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
          elif self.header_encoding == SHORTEST:
              lenb64 = email.base64MIME.base64_len(s)
              lenqp = email.quopriMIME.header_quopri_len(s)
              if lenb64 < lenqp:
                  return email.base64MIME.header_encode(s, cset)
              else:
-                return email.quopriMIME.header_encode(s, cset)
+                return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
          else:
              return s
  
@@ -348,7 +387,7 @@ class Charset:
          # 7bit/8bit encodings return the string unchanged (module conversions)
          if self.body_encoding is BASE64:
              return email.base64MIME.body_encode(s)
-        elif self.header_encoding is QP:
+        elif self.body_encoding is QP:
              return email.quopriMIME.body_encode(s)
          else:
              return s
diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py

index 58e2f91d622a4c1ef07baecfa50fcbc822abe599..9cce51c40b557b0ce81509fc3a274d6ee61732be 100644 (file)
--- a/Lib/email/Generator.py
+++ b/Lib/email/Generator.py
@@ -4,14 +4,16 @@
  """Classes to generate plain text from a message object tree.
  """
  
-import time
  import re
+import time
+import locale
  import random
  
  from types import ListType, StringType
  from cStringIO import StringIO
  
  from email.Header import Header
+from email.Parser import NLCRE
  
  try:
      from email._compat22 import _isstring
@@ -159,44 +161,29 @@ class Generator:
  
      def _write_headers(self, msg):
          for h, v in msg.items():
-            # RFC 2822 says that lines SHOULD be no more than maxheaderlen
-            # characters wide, so we're well within our rights to split long
-            # headers.
-            text = '%s: %s' % (h, v)
-            if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
-                text = self._split_header(text)
-            print >> self._fp, text
+            print >> self._fp, '%s:' % h,
+            if self.__maxheaderlen == 0:
+                # Explicit no-wrapping
+                print >> self._fp, v
+            elif isinstance(v, Header):
+                # Header instances know what to do
+                print >> self._fp, v.encode()
+            elif _is8bitstring(v):
+                # If we have raw 8bit data in a byte string, we have no idea
+                # what the encoding is.  There is no safe way to split this
+                # string.  If it's ascii-subset, then we could do a normal
+                # ascii split, but if it's multibyte then we could break the
+                # string.  There's no way to know so the least harm seems to
+                # be to not split the string and risk it being too long.
+                print >> self._fp, v
+            else:
+                # Header's got lots of smarts, so use it.
+                print >> self._fp, Header(
+                    v, maxlinelen=self.__maxheaderlen,
+                    header_name=h, continuation_ws='\t').encode()
          # A blank line always separates headers from body
          print >> self._fp
  
-    def _split_header(self, text):
-        maxheaderlen = self.__maxheaderlen
-        # Find out whether any lines in the header are really longer than
-        # maxheaderlen characters wide.  There could be continuation lines
-        # that actually shorten it.  Also, replace hard tabs with 8 spaces.
-        lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
-        for line in lines:
-            if len(line) > maxheaderlen:
-                break
-        else:
-            # No line was actually longer than maxheaderlen characters, so
-            # just return the original unchanged.
-            return text
-        # If we have raw 8bit data in a byte string, we have no idea what the
-        # encoding is.  I think there is no safe way to split this string.  If
-        # it's ascii-subset, then we could do a normal ascii split, but if
-        # it's multibyte then we could break the string.  There's no way to
-        # know so the least harm seems to be to not split the string and risk
-        # it being too long.
-        if _is8bitstring(text):
-            return text
-        # The `text' argument already has the field name prepended, so don't
-        # provide it here or the first line will get folded too short.
-        h = Header(text, maxlinelen=maxheaderlen,
-                   # For backwards compatibility, we use a hard tab here
-                   continuation_ws='\t')
-        return h.encode()
-
      #
      # Handlers for writing types and subtypes
      #
@@ -258,6 +245,14 @@ class Generator:
          # Write out any preamble
          if msg.preamble is not None:
              self._fp.write(msg.preamble)
+            # If preamble is the empty string, the length of the split will be
+            # 1, but the last element will be the empty string.  If it's
+            # anything else but does not end in a line separator, the length
+            # will be > 1 and not end in an empty string.  We need to
+            # guarantee a newline after the preamble, but don't add too many.
+            plines = NLCRE.split(msg.preamble)
+            if plines <> [''] and plines[-1] <> '':
+                self._fp.write('\n')
          # First boundary is a bit different; it doesn't have a leading extra
          # newline.
          print >> self._fp, '--' + boundary
@@ -364,7 +359,8 @@ class DecodedGenerator(Generator):
  def _make_boundary(text=None):
      # Craft a random boundary.  If text is given, ensure that the chosen
      # boundary doesn't appear in the text.
-    boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
+    dp = locale.localeconv().get('decimal_point', '.')
+    boundary = ('=' * 15) + repr(random.random()).split(dp)[1] + '=='
      if text is None:
          return boundary
      b = boundary
diff --git a/Lib/email/Header.py b/Lib/email/Header.py

index 0ceacc7bf309feeacf0fefbcec0367aa227c5bef..624e7c445b91007f552d5803964577c77d89d9b8 100644 (file)
--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
@@ -4,10 +4,12 @@
  """Header encoding and decoding functionality."""
  
  import re
+import binascii
  from types import StringType, UnicodeType
  
  import email.quopriMIME
  import email.base64MIME
+from email.Errors import HeaderParseError
  from email.Charset import Charset
  
  try:
@@ -25,8 +27,11 @@ except NameError:
  CRLFSPACE = '\r\n '
  CRLF = '\r\n'
  NL = '\n'
+SPACE = ' '
+USPACE = u' '
  SPACE8 = ' ' * 8
  EMPTYSTRING = ''
+UEMPTYSTRING = u''
  
  MAXLINELEN = 76
  
@@ -47,6 +52,13 @@ ecre = re.compile(r'''
    \?=                   # literal ?=
    ''', re.VERBOSE | re.IGNORECASE)
  
+pcre = re.compile('([,;])')
+
+# Field name regexp, including trailing colon, but not separating whitespace,
+# according to RFC 2822.  Character range is from tilde to exclamation mark.
+# For use with .match()
+fcre = re.compile(r'[\041-\176]+:$')
+
  
  \f
  # Helpers
@@ -61,6 +73,9 @@ def decode_header(header):
      decoded parts of the header.  Charset is None for non-encoded parts of the
      header, otherwise a lower-case string containing the name of the character
      set specified in the encoded string.
+
+    An email.Errors.HeaderParseError may be raised when certain decoding error
+    occurs (e.g. a base64 decoding exception).
      """
      # If no encoding, just return the header
      header = str(header)
@@ -79,18 +94,24 @@ def decode_header(header):
              if unenc:
                  # Should we continue a long line?
                  if decoded and decoded[-1][1] is None:
-                    decoded[-1] = (decoded[-1][0] + dec, None)
+                    decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
                  else:
                      decoded.append((unenc, None))
              if parts:
                  charset, encoding = [s.lower() for s in parts[0:2]]
                  encoded = parts[2]
-                dec = ''
+                dec = None
                  if encoding == 'q':
                      dec = email.quopriMIME.header_decode(encoded)
                  elif encoding == 'b':
-                    dec = email.base64MIME.decode(encoded)
-                else:
+                    try:
+                        dec = email.base64MIME.decode(encoded)
+                    except binascii.Error:
+                        # Turn this into a higher level exception.  BAW: Right
+                        # now we throw the lower level exception away but
+                        # when/if we get exception chaining, we'll preserve it.
+                        raise HeaderParseError
+                if dec is None:
                      dec = encoded
  
                  if decoded and decoded[-1][1] == charset:
@@ -126,8 +147,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
  
  \f
  class Header:
-    def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
-                 continuation_ws=' '):
+    def __init__(self, s=None, charset=None,
+                 maxlinelen=None, header_name=None,
+                 continuation_ws=' ', errors='strict'):
          """Create a MIME-compliant header that can contain many character sets.
  
          Optional s is the initial header value.  If None, the initial header
@@ -150,6 +172,8 @@ class Header:
          continuation_ws must be RFC 2822 compliant folding whitespace (usually
          either a space or a hard tab) which will be prepended to continuation
          lines.
+
+        errors is passed through to the .append() call.
          """
          if charset is None:
              charset = USASCII
@@ -161,7 +185,7 @@ class Header:
          # BAW: I believe `chunks' and `maxlinelen' should be non-public.
          self._chunks = []
          if s is not None:
-            self.append(s, charset)
+            self.append(s, charset, errors)
          if maxlinelen is None:
              maxlinelen = MAXLINELEN
          if header_name is None:
@@ -182,9 +206,24 @@ class Header:
  
      def __unicode__(self):
          """Helper for the built-in unicode function."""
-        # charset item is a Charset instance so we need to stringify it.
-        uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
-        return u''.join(uchunks)
+        uchunks = []
+        lastcs = None
+        for s, charset in self._chunks:
+            # We must preserve spaces between encoded and non-encoded word
+            # boundaries, which means for us we need to add a space when we go
+            # from a charset to None/us-ascii, or from None/us-ascii to a
+            # charset.  Only do this for the second and subsequent chunks.
+            nextcs = charset
+            if uchunks:
+                if lastcs is not None:
+                    if nextcs is None or nextcs == 'us-ascii':
+                        uchunks.append(USPACE)
+                        nextcs = None
+                elif nextcs is not None and nextcs <> 'us-ascii':
+                    uchunks.append(USPACE)
+            lastcs = nextcs
+            uchunks.append(unicode(s, str(charset)))
+        return UEMPTYSTRING.join(uchunks)
  
      # Rich comparison operators for equality only.  BAW: does it make sense to
      # have or explicitly disable <, <=, >, >= operators?
@@ -196,7 +235,7 @@ class Header:
      def __ne__(self, other):
          return not self == other
  
-    def append(self, s, charset=None):
+    def append(self, s, charset=None, errors='strict'):
          """Append a string to the MIME header.
  
          Optional charset, if given, should be a Charset instance or the name
@@ -213,6 +252,9 @@ class Header:
          using RFC 2047 rules, the Unicode string will be encoded using the
          following charsets in order: us-ascii, the charset hint, utf-8.  The
          first character set not to provoke a UnicodeError is used.
+
+        Optional `errors' is passed as the third argument to any unicode() or
+        ustr.encode() call.
          """
          if charset is None:
              charset = self._charset
@@ -227,12 +269,12 @@ class Header:
                  # Possibly raise UnicodeError if the byte string can't be
                  # converted to a unicode with the input codec of the charset.
                  incodec = charset.input_codec or 'us-ascii'
-                ustr = unicode(s, incodec)
+                ustr = unicode(s, incodec, errors)
                  # Now make sure that the unicode could be converted back to a
                  # byte string with the output codec, which may be different
                  # than the iput coded.  Still, use the original byte string.
                  outcodec = charset.output_codec or 'us-ascii'
-                ustr.encode(outcodec)
+                ustr.encode(outcodec, errors)
              elif isinstance(s, UnicodeType):
                  # Now we have to be sure the unicode string can be converted
                  # to a byte string with a reasonable output codec.  We want to
@@ -240,7 +282,7 @@ class Header:
                  for charset in USASCII, charset, UTF8:
                      try:
                          outcodec = charset.output_codec or 'us-ascii'
-                        s = s.encode(outcodec)
+                        s = s.encode(outcodec, errors)
                          break
                      except UnicodeError:
                          pass
@@ -248,13 +290,13 @@ class Header:
                      assert False, 'utf-8 conversion failed'
          self._chunks.append((s, charset))
  
-    def _split(self, s, charset, firstline=False):
+    def _split(self, s, charset, maxlinelen, splitchars):
          # Split up a header safely for use with encode_chunks.
          splittable = charset.to_splittable(s)
-        encoded = charset.from_splittable(splittable)
+        encoded = charset.from_splittable(splittable, True)
          elen = charset.encoded_header_len(encoded)
-
-        if elen <= self._maxlinelen:
+        # If the line's encoded length first, just return it
+        if elen <= maxlinelen:
              return [(encoded, charset)]
          # If we have undetermined raw 8bit characters sitting in a byte
          # string, we really don't know what the right thing to do is.  We
@@ -262,7 +304,7 @@ class Header:
          # could break if we split it between pairs.  The least harm seems to
          # be to not split the header at all, but that means they could go out
          # longer than maxlinelen.
-        elif charset == '8bit':
+        if charset == '8bit':
              return [(s, charset)]
          # BAW: I'm not sure what the right test here is.  What we're trying to
          # do is be faithful to RFC 2822's recommendation that ($2.2.3):
@@ -275,101 +317,31 @@ class Header:
          # For now, I can only imagine doing this when the charset is us-ascii,
          # although it's possible that other charsets may also benefit from the
          # higher-level syntactic breaks.
-        #
          elif charset == 'us-ascii':
-            return self._ascii_split(s, charset, firstline)
+            return self._split_ascii(s, charset, maxlinelen, splitchars)
          # BAW: should we use encoded?
          elif elen == len(s):
              # We can split on _maxlinelen boundaries because we know that the
              # encoding won't change the size of the string
-            splitpnt = self._maxlinelen
+            splitpnt = maxlinelen
              first = charset.from_splittable(splittable[:splitpnt], False)
              last = charset.from_splittable(splittable[splitpnt:], False)
          else:
-            # Divide and conquer.
-            halfway = _floordiv(len(splittable), 2)
-            first = charset.from_splittable(splittable[:halfway], False)
-            last = charset.from_splittable(splittable[halfway:], False)
-        # Do the split
-        return self._split(first, charset, firstline) + \
-               self._split(last, charset)
-
-    def _ascii_split(self, s, charset, firstline):
-        # Attempt to split the line at the highest-level syntactic break
-        # possible.  Note that we don't have a lot of smarts about field
-        # syntax; we just try to break on semi-colons, then whitespace.
-        rtn = []
-        lines = s.splitlines()
-        while lines:
-            line = lines.pop(0)
-            if firstline:
-                maxlinelen = self._firstlinelen
-                firstline = False
-            else:
-                #line = line.lstrip()
-                maxlinelen = self._maxlinelen
-            # Short lines can remain unchanged
-            if len(line.replace('\t', SPACE8)) <= maxlinelen:
-                rtn.append(line)
-            else:
-                oldlen = len(line)
-                # Try to break the line on semicolons, but if that doesn't
-                # work, try to split on folding whitespace.
-                while len(line) > maxlinelen:
-                    i = line.rfind(';', 0, maxlinelen)
-                    if i < 0:
-                        break
-                    rtn.append(line[:i] + ';')
-                    line = line[i+1:]
-                # Is the remaining stuff still longer than maxlinelen?
-                if len(line) <= maxlinelen:
-                    # Splitting on semis worked
-                    rtn.append(line)
-                    continue
-                # Splitting on semis didn't finish the job.  If it did any
-                # work at all, stick the remaining junk on the front of the
-                # `lines' sequence and let the next pass do its thing.
-                if len(line) <> oldlen:
-                    lines.insert(0, line)
-                    continue
-                # Otherwise, splitting on semis didn't help at all.
-                parts = re.split(r'(\s+)', line)
-                if len(parts) == 1 or (len(parts) == 3 and
-                                       parts[0].endswith(':')):
-                    # This line can't be split on whitespace.  There's now
-                    # little we can do to get this into maxlinelen.  BAW:
-                    # We're still potentially breaking the RFC by possibly
-                    # allowing lines longer than the absolute maximum of 998
-                    # characters.  For now, let it slide.
-                    #
-                    # len(parts) will be 1 if this line has no `Field: '
-                    # prefix, otherwise it will be len(3).
-                    rtn.append(line)
-                    continue
-                # There is whitespace we can split on.
-                first = parts.pop(0)
-                sublines = [first]
-                acc = len(first)
-                while parts:
-                    len0 = len(parts[0])
-                    len1 = len(parts[1])
-                    if acc + len0 + len1 <= maxlinelen:
-                        sublines.append(parts.pop(0))
-                        sublines.append(parts.pop(0))
-                        acc += len0 + len1
-                    else:
-                        # Split it here, but don't forget to ignore the
-                        # next whitespace-only part
-                        if first <> '':
-                            rtn.append(EMPTYSTRING.join(sublines))
-                        del parts[0]
-                        first = parts.pop(0)
-                        sublines = [first]
-                        acc = len(first)
-                rtn.append(EMPTYSTRING.join(sublines))
-        return [(chunk, charset) for chunk in rtn]
-
-    def _encode_chunks(self, newchunks):
+            # Binary search for split point
+            first, last = _binsplit(splittable, charset, maxlinelen)
+        # first is of the proper length so just wrap it in the appropriate
+        # chrome.  last must be recursively split.
+        fsplittable = charset.to_splittable(first)
+        fencoded = charset.from_splittable(fsplittable, True)
+        chunk = [(fencoded, charset)]
+        return chunk + self._split(last, charset, self._maxlinelen, splitchars)
+
+    def _split_ascii(self, s, charset, firstlen, splitchars):
+        chunks = _split_ascii(s, firstlen, self._maxlinelen,
+                              self._continuation_ws, splitchars)
+        return zip(chunks, [charset]*len(chunks))
+
+    def _encode_chunks(self, newchunks, maxlinelen):
          # MIME-encode a header with many different charsets and/or encodings.
          #
          # Given a list of pairs (string, charset), return a MIME-encoded
@@ -387,19 +359,24 @@ class Header:
          #
          # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
          #  =?charset2?b?SvxyZ2VuIEL2aW5n?="
-        #
          chunks = []
          for header, charset in newchunks:
+            if not header:
+                continue
              if charset is None or charset.header_encoding is None:
-                # There's no encoding for this chunk's charsets
-                _max_append(chunks, header, self._maxlinelen)
+                s = header
+            else:
+                s = charset.header_encode(header)
+            # Don't add more folding whitespace than necessary
+            if chunks and chunks[-1].endswith(' '):
+                extra = ''
              else:
-                _max_append(chunks, charset.header_encode(header),
-                            self._maxlinelen, ' ')
+                extra = ' '
+            _max_append(chunks, s, maxlinelen, extra)
          joiner = NL + self._continuation_ws
          return joiner.join(chunks)
  
-    def encode(self):
+    def encode(self, splitchars=';, '):
          """Encode a message header into an RFC-compliant format.
  
          There are many issues involved in converting a given string for use in
@@ -416,8 +393,123 @@ class Header:
  
          If the given charset is not known or an error occurs during
          conversion, this function will return the header untouched.
+
+        Optional splitchars is a string containing characters to split long
+        ASCII lines on, in rough support of RFC 2822's `highest level
+        syntactic breaks'.  This doesn't affect RFC 2047 encoded lines.
          """
          newchunks = []
+        maxlinelen = self._firstlinelen
+        lastlen = 0
          for s, charset in self._chunks:
-            newchunks += self._split(s, charset, True)
-        return self._encode_chunks(newchunks)
+            # The first bit of the next chunk should be just long enough to
+            # fill the next line.  Don't forget the space separating the
+            # encoded words.
+            targetlen = maxlinelen - lastlen - 1
+            if targetlen < charset.encoded_header_len(''):
+                # Stick it on the next line
+                targetlen = maxlinelen
+            newchunks += self._split(s, charset, targetlen, splitchars)
+            lastchunk, lastcharset = newchunks[-1]
+            lastlen = lastcharset.encoded_header_len(lastchunk)
+        return self._encode_chunks(newchunks, maxlinelen)
+
+
+\f
+def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
+    lines = []
+    maxlen = firstlen
+    for line in s.splitlines():
+        # Ignore any leading whitespace (i.e. continuation whitespace) already
+        # on the line, since we'll be adding our own.
+        line = line.lstrip()
+        if len(line) < maxlen:
+            lines.append(line)
+            maxlen = restlen
+            continue
+        # Attempt to split the line at the highest-level syntactic break
+        # possible.  Note that we don't have a lot of smarts about field
+        # syntax; we just try to break on semi-colons, then commas, then
+        # whitespace.
+        for ch in splitchars:
+            if line.find(ch) >= 0:
+                break
+        else:
+            # There's nothing useful to split the line on, not even spaces, so
+            # just append this line unchanged
+            lines.append(line)
+            maxlen = restlen
+            continue
+        # Now split the line on the character plus trailing whitespace
+        cre = re.compile(r'%s\s*' % ch)
+        if ch in ';,':
+            eol = ch
+        else:
+            eol = ''
+        joiner = eol + ' '
+        joinlen = len(joiner)
+        wslen = len(continuation_ws.replace('\t', SPACE8))
+        this = []
+        linelen = 0
+        for part in cre.split(line):
+            curlen = linelen + max(0, len(this)-1) * joinlen
+            partlen = len(part)
+            onfirstline = not lines
+            # We don't want to split after the field name, if we're on the
+            # first line and the field name is present in the header string.
+            if ch == ' ' and onfirstline and \
+                   len(this) == 1 and fcre.match(this[0]):
+                this.append(part)
+                linelen += partlen
+            elif curlen + partlen > maxlen:
+                if this:
+                    lines.append(joiner.join(this) + eol)
+                # If this part is longer than maxlen and we aren't already
+                # splitting on whitespace, try to recursively split this line
+                # on whitespace.
+                if partlen > maxlen and ch <> ' ':
+                    subl = _split_ascii(part, maxlen, restlen,
+                                        continuation_ws, ' ')
+                    lines.extend(subl[:-1])
+                    this = [subl[-1]]
+                else:
+                    this = [part]
+                linelen = wslen + len(this[-1])
+                maxlen = restlen
+            else:
+                this.append(part)
+                linelen += partlen
+        # Put any left over parts on a line by themselves
+        if this:
+            lines.append(joiner.join(this))
+    return lines
+
+
+\f
+def _binsplit(splittable, charset, maxlinelen):
+    i = 0
+    j = len(splittable)
+    while i < j:
+        # Invariants:
+        # 1. splittable[:k] fits for all k <= i (note that we *assume*,
+        #    at the start, that splittable[:0] fits).
+        # 2. splittable[:k] does not fit for any k > j (at the start,
+        #    this means we shouldn't look at any k > len(splittable)).
+        # 3. We don't know about splittable[:k] for k in i+1..j.
+        # 4. We want to set i to the largest k that fits, with i <= k <= j.
+        #
+        m = (i+j+1) >> 1  # ceiling((i+j)/2); i < m <= j
+        chunk = charset.from_splittable(splittable[:m], True)
+        chunklen = charset.encoded_header_len(chunk)
+        if chunklen <= maxlinelen:
+            # m is acceptable, so is a new lower bound.
+            i = m
+        else:
+            # m is not acceptable, so final i must be < m.
+            j = m - 1
+    # i == j.  Invariant #1 implies that splittable[:i] fits, and
+    # invariant #2 implies that splittable[:i+1] does not fit, so i
+    # is what we're looking for.
+    first = charset.from_splittable(splittable[:i], False)
+    last  = charset.from_splittable(splittable[i:], False)
+    return first, last
diff --git a/Lib/email/MIMEText.py b/Lib/email/MIMEText.py

index d91b93df3d1814ff6d3ffadf3a63c7ddfc9ab883..d049ad9fd80bf92fdb795adbbc5a42c94faebf83 100644 (file)
--- a/Lib/email/MIMEText.py
+++ b/Lib/email/MIMEText.py
@@ -17,8 +17,7 @@ class MIMEText(MIMENonMultipart):
                   _encoder=None):
          """Create a text/* type MIME document.
  
-        _text is the string for this message object.  If the text does not end
-        in a newline, one is added.
+        _text is the string for this message object.
  
          _subtype is the MIME sub content type, defaulting to "plain".
  
@@ -35,8 +34,6 @@ class MIMEText(MIMENonMultipart):
          """
          MIMENonMultipart.__init__(self, 'text', _subtype,
                                    **{'charset': _charset})
-        if _text and not _text.endswith('\n'):
-            _text += '\n'
          self.set_payload(_text, _charset)
          if _encoder is not None:
              warnings.warn('_encoder argument is obsolete.',
diff --git a/Lib/email/Message.py b/Lib/email/Message.py

index 16ae12082eea94ed891cc3f077a862457a86bce5..66f8640eb1f7459434f59de7051da0deb9a94b0f 100644 (file)
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -5,13 +5,15 @@
  """
  
  import re
+import uu
+import binascii
  import warnings
  from cStringIO import StringIO
  from types import ListType, TupleType, StringType
  
  # Intrapackage imports
-from email import Errors
  from email import Utils
+from email import Errors
  from email import Charset
  
  SEMISPACE = '; '
@@ -164,14 +166,18 @@ class Message:
          the list object, you modify the message's payload in place.  Optional
          i returns that index into the payload.
  
-        Optional decode is a flag (defaulting to False) indicating whether the
-        payload should be decoded or not, according to the
-        Content-Transfer-Encoding header.  When True and the message is not a
-        multipart, the payload will be decoded if this header's value is
-        `quoted-printable' or `base64'.  If some other encoding is used, or
-        the header is missing, the payload is returned as-is (undecoded).  If
-        the message is a multipart and the decode flag is True, then None is
-        returned.
+        Optional decode is a flag indicating whether the payload should be
+        decoded or not, according to the Content-Transfer-Encoding header
+        (default is False).
+
+        When True and the message is not a multipart, the payload will be
+        decoded if this header's value is `quoted-printable' or `base64'.  If
+        some other encoding is used, or the header is missing, or if the
+        payload has bogus data (i.e. bogus base64 or uuencoded data), the
+        payload is returned as-is.
+
+        If the message is a multipart and the decode flag is True, then None
+        is returned.
          """
          if i is None:
              payload = self._payload
@@ -182,11 +188,23 @@ class Message:
          if decode:
              if self.is_multipart():
                  return None
-            cte = self.get('content-transfer-encoding', '')
-            if cte.lower() == 'quoted-printable':
+            cte = self.get('content-transfer-encoding', '').lower()
+            if cte == 'quoted-printable':
                  return Utils._qdecode(payload)
-            elif cte.lower() == 'base64':
-                return Utils._bdecode(payload)
+            elif cte == 'base64':
+                try:
+                    return Utils._bdecode(payload)
+                except binascii.Error:
+                    # Incorrect padding
+                    return payload
+            elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+                sfp = StringIO()
+                try:
+                    uu.decode(StringIO(payload+'\n'), sfp)
+                    payload = sfp.getvalue()
+                except uu.Error:
+                    # Some decoding problem
+                    return payload
          # Everything else, including encodings with 8bit or 7bit are returned
          # unchanged.
          return payload
diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py

index 5fea3c398630bccf460a0fa1b1bfc98705697218..09fac4552f9379f8a32d57dd74329527c0478037 100644 (file)
--- a/Lib/email/Parser.py
+++ b/Lib/email/Parser.py
@@ -20,7 +20,7 @@ except NameError:
      True = 1
      False = 0
  
-nlcre = re.compile('\r\n|\r|\n')
+NLCRE = re.compile('\r\n|\r|\n')
  
  
  \f
@@ -59,9 +59,9 @@ class Parser:
          meaning it parses the entire contents of the file.
          """
          root = self._class()
-        self._parseheaders(root, fp)
+        firstbodyline = self._parseheaders(root, fp)
          if not headersonly:
-            self._parsebody(root, fp)
+            self._parsebody(root, fp, firstbodyline)
          return root
  
      def parsestr(self, text, headersonly=False):
@@ -80,6 +80,7 @@ class Parser:
          lastheader = ''
          lastvalue = []
          lineno = 0
+        firstbodyline = None
          while True:
              # Don't strip the line before we test for the end condition,
              # because whitespace-only header lines are RFC compliant
@@ -120,13 +121,16 @@ class Parser:
              if i < 0:
                  if self._strict:
                      raise Errors.HeaderParseError(
-                        "Not a header, not a continuation: ``%s''"%line)
+                        "Not a header, not a continuation: ``%s''" % line)
                  elif lineno == 1 and line.startswith('--'):
                      # allow through duplicate boundary tags.
                      continue
                  else:
-                    raise Errors.HeaderParseError(
-                        "Not a header, not a continuation: ``%s''"%line)
+                    # There was no separating blank line as mandated by RFC
+                    # 2822, but we're in non-strict mode.  So just offer up
+                    # this current line as the first body line.
+                    firstbodyline = line
+                    break
              if lastheader:
                  container[lastheader] = NL.join(lastvalue)
              lastheader = line[:i]
@@ -134,8 +138,9 @@ class Parser:
          # Make sure we retain the last header
          if lastheader:
              container[lastheader] = NL.join(lastvalue)
+        return firstbodyline
  
-    def _parsebody(self, container, fp):
+    def _parsebody(self, container, fp, firstbodyline=None):
          # Parse the body, but first split the payload on the content-type
          # boundary if present.
          boundary = container.get_boundary()
@@ -152,6 +157,8 @@ class Parser:
              # boundary.
              separator = '--' + boundary
              payload = fp.read()
+            if firstbodyline is not None:
+                payload = firstbodyline + '\n' + payload
              # We use an RE here because boundaries can have trailing
              # whitespace.
              mo = re.search(
@@ -169,7 +176,7 @@ class Parser:
                  preamble = payload[0:start]
              # Find out what kind of line endings we're using
              start += len(mo.group('sep')) + len(mo.group('ws'))
-            mo = nlcre.search(payload, start)
+            mo = NLCRE.search(payload, start)
              if mo:
                  start += len(mo.group(0))
              # We create a compiled regexp first because we need to be able to
@@ -221,9 +228,13 @@ class Parser:
                          # msgobj in this case is the "message/rfc822" container
                          msgobj = self.parsestr(parthdrs, headersonly=1)
                      # while submsgobj is the message itself
-                    submsgobj = self.parsestr(part)
-                    msgobj.attach(submsgobj)
                      msgobj.set_default_type('message/rfc822')
+                    maintype = msgobj.get_content_maintype()
+                    if maintype in ('message', 'multipart'):
+                        submsgobj = self.parsestr(part)
+                        msgobj.attach(submsgobj)
+                    else:
+                        msgobj.set_payload(part)
                  else:
                      msgobj = self.parsestr(part)
                  container.preamble = preamble
@@ -256,7 +267,10 @@ class Parser:
                  self._parsebody(msg, fp)
              container.attach(msg)
          else:
-            container.set_payload(fp.read())
+            text = fp.read()
+            if firstbodyline is not None:
+                text = firstbodyline + '\n' + text
+            container.set_payload(text)
  
  
  \f
@@ -270,6 +284,9 @@ class HeaderParser(Parser):
      Parsing with this subclass can be considerably faster if all you're
      interested in is the message headers.
      """
-    def _parsebody(self, container, fp):
+    def _parsebody(self, container, fp, firstbodyline=None):
          # Consume but do not parse, the body
-        container.set_payload(fp.read())
+        text = fp.read()
+        if firstbodyline is not None:
+            text = firstbodyline + '\n' + text
+        container.set_payload(text)
diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py

index b619c6b798517ecedbae35168b00c8fb8edb7d86..2b8b94fec49b7cd0ba2b2af4c9d7e27fcb850107 100644 (file)
--- a/Lib/email/Utils.py
+++ b/Lib/email/Utils.py
@@ -13,13 +13,13 @@ import warnings
  from cStringIO import StringIO
  from types import ListType
  
-from rfc822 import quote
-from rfc822 import AddressList as _AddressList
-from rfc822 import mktime_tz
+from email._parseaddr import quote
+from email._parseaddr import AddressList as _AddressList
+from email._parseaddr import mktime_tz
  
  # We need wormarounds for bugs in these methods in older Pythons (see below)
-from rfc822 import parsedate as _parsedate
-from rfc822 import parsedate_tz as _parsedate_tz
+from email._parseaddr import parsedate as _parsedate
+from email._parseaddr import parsedate_tz as _parsedate_tz
  
  try:
      True, False
@@ -54,8 +54,8 @@ EMPTYSTRING = ''
  UEMPTYSTRING = u''
  CRLF = '\r\n'
  
-specialsre = re.compile(r'[][\()<>@,:;".]')
-escapesre = re.compile(r'[][\()"]')
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[][\\()"]')
  
  
  \f
@@ -66,8 +66,6 @@ def _identity(s):
  
  
  def _bdecode(s):
-    if not s:
-        return s
      # We can't quite use base64.encodestring() since it tacks on a "courtesy
      # newline".  Blech!
      if not s:
@@ -280,9 +278,11 @@ def unquote(str):
  def decode_rfc2231(s):
      """Decode string according to RFC 2231"""
      import urllib
-    charset, language, s = s.split("'", 2)
-    s = urllib.unquote(s)
-    return charset, language, s
+    parts = s.split("'", 2)
+    if len(parts) == 1:
+        return None, None, s
+    charset, language, s = parts
+    return charset, language, urllib.unquote(s)
  
  
  def encode_rfc2231(s, charset=None, language=None):
@@ -335,6 +335,6 @@ def decode_params(params):
              for num, continuation in continuations:
                  value.append(continuation)
              charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
-            new_params.append((name,
-                               (charset, language, '"%s"' % quote(value))))
+            new_params.append(
+                (name, (charset, language, '"%s"' % quote(value))))
      return new_params
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py

index b784da8247fa9df50484e2e21acc51e87b44d4bd..71b5b5d08b58158ff2f00a2bcee9d00c04e326f6 100644 (file)
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -4,7 +4,7 @@
  """A package for parsing, handling, and generating email messages.
  """
  
-__version__ = '2.4.3'
+__version__ = '2.5'
  
  __all__ = [
      'base64MIME',
diff --git a/Lib/email/_compat21.py b/Lib/email/_compat21.py

index de8c44753de43601210714d835b08051ac21a9ad..0e0b3d07652b4a7ecece63f2d2253e9c30c71c47 100644 (file)
--- a/Lib/email/_compat21.py
+++ b/Lib/email/_compat21.py
@@ -7,6 +7,9 @@
  from cStringIO import StringIO
  from types import StringType, UnicodeType
  
+False = 0
+True = 1
+
  
  \f
  # This function will become a method of the Message class
@@ -31,17 +34,20 @@ def _floordiv(i, j):
  
  
  def _isstring(obj):
-    return isinstance(obj, StringType) or isinstance(obj, UnicodeType)    
+    return isinstance(obj, StringType) or isinstance(obj, UnicodeType)
  
  
  \f
  # These two functions are imported into the Iterators.py interface module.
  # The Python 2.2 version uses generators for efficiency.
-def body_line_iterator(msg):
-    """Iterate over the parts, returning string payloads line-by-line."""
+def body_line_iterator(msg, decode=False):
+    """Iterate over the parts, returning string payloads line-by-line.
+
+    Optional decode (default False) is passed through to .get_payload().
+    """
      lines = []
      for subpart in msg.walk():
-        payload = subpart.get_payload()
+        payload = subpart.get_payload(decode=decode)
          if _isstring(payload):
              for line in StringIO(payload).readlines():
                  lines.append(line)
diff --git a/Lib/email/_compat22.py b/Lib/email/_compat22.py

index a05451f25d9637a40cdd63c0bc27edc79455a21a..ec2d2f8a0a9e52b583f521714b2c15840547ae4a 100644 (file)
--- a/Lib/email/_compat22.py
+++ b/Lib/email/_compat22.py
@@ -38,10 +38,13 @@ def _isstring(obj):
  \f
  # These two functions are imported into the Iterators.py interface module.
  # The Python 2.2 version uses generators for efficiency.
-def body_line_iterator(msg):
-    """Iterate over the parts, returning string payloads line-by-line."""
+def body_line_iterator(msg, decode=False):
+    """Iterate over the parts, returning string payloads line-by-line.
+
+    Optional decode (default False) is passed through to .get_payload().
+    """
      for subpart in msg.walk():
-        payload = subpart.get_payload()
+        payload = subpart.get_payload(decode=decode)
          if _isstring(payload):
              for line in StringIO(payload):
                  yield line
diff --git a/Lib/email/base64MIME.py b/Lib/email/base64MIME.py

index 56e44e1c2c62d5b0d86e7e015123452463575cab..a247773300a3c7f23efd83f28c61a4bb181f4e31 100644 (file)
--- a/Lib/email/base64MIME.py
+++ b/Lib/email/base64MIME.py
@@ -102,9 +102,6 @@ def header_encode(header, charset='iso-8859-1', keep_eols=False,
      max_encoded = maxlinelen - len(charset) - MISC_LEN
      max_unencoded = _floordiv(max_encoded * 3, 4)
  
-    # BAW: Ben's original code used a step of max_unencoded, but I think it
-    # ought to be max_encoded.  Otherwise, where's max_encoded used?  I'm
-    # still not sure what the
      for i in range(0, len(header), max_unencoded):
          base64ed.append(b2a_base64(header[i:i+max_unencoded]))
  
diff --git a/Lib/email/quopriMIME.py b/Lib/email/quopriMIME.py

index 18ddd89a1db21869439dbabc473953b91770e289..67369b521fc768fb9c11a683b1a6147786cac7ad 100644 (file)
--- a/Lib/email/quopriMIME.py
+++ b/Lib/email/quopriMIME.py
@@ -82,7 +82,7 @@ def body_quopri_len(str):
  def _max_append(L, s, maxlen, extra=''):
      if not L:
          L.append(s.lstrip())
-    elif len(L[-1]) + len(s) < maxlen:
+    elif len(L[-1]) + len(s) <= maxlen:
          L[-1] += extra + s
      else:
          L.append(s.lstrip())
@@ -116,7 +116,8 @@ def header_encode(header, charset="iso-8859-1", keep_eols=False,
        =?charset?q?Silly_=C8nglish_Kn=EEghts?="
  
      with each line wrapped safely at, at most, maxlinelen characters (defaults
-    to 76 characters).
+    to 76 characters).  If maxlinelen is None, the entire string is encoded in
+    one chunk with no splitting.
  
      End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
      to the canonical email line separator \\r\\n unless the keep_eols
@@ -134,9 +135,13 @@ def header_encode(header, charset="iso-8859-1", keep_eols=False,
          header = fix_eols(header)
  
      # Quopri encode each line, in encoded chunks no greater than maxlinelen in
-    # lenght, after the RFC chrome is added in.
+    # length, after the RFC chrome is added in.
      quoted = []
-    max_encoded = maxlinelen - len(charset) - MISC_LEN
+    if maxlinelen is None:
+        # An obnoxiously large number that's good enough
+        max_encoded = 100000
+    else:
+        max_encoded = maxlinelen - len(charset) - MISC_LEN - 1
  
      for c in header:
          # Space may be represented as _ instead of =20 for readability
diff --git a/Lib/email/test/data/msg_21.txt b/Lib/email/test/data/msg_21.txt

index 5b2e777583d09775e2b47fc3032f55ef687774eb..23590b255dd777e10add4f3bd7a0bc95d656b8b3 100644 (file)
--- a/Lib/email/test/data/msg_21.txt
+++ b/Lib/email/test/data/msg_21.txt
@@ -10,13 +10,11 @@ MIME-Version: 1.0
  Content-Transfer-Encoding: 7bit
  
  One
-
  --BOUNDARY
  Content-Type: text/plain; charset="us-ascii"
  MIME-Version: 1.0
  Content-Transfer-Encoding: 7bit
  
  Two
-
  --BOUNDARY--
  End of MIME message
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py

index 5fca8a2336a9de6520bb417ee67ece593122fee5..280b400161caa3c01992f83bbcd65221c3d919f7 100644 (file)
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -1,15 +1,15 @@
-# Copyright (C) 2001,2002 Python Software Foundation
+# Copyright (C) 2001,2002,2003 Python Software Foundation
  # email package unit tests
  
-import sys
  import os
+import sys
  import time
-import unittest
  import base64
  import difflib
+import unittest
+import warnings
  from cStringIO import StringIO
  from types import StringType, ListType
-import warnings
  
  import email
  
@@ -42,11 +42,17 @@ SPACE = ' '
  # We don't care about DeprecationWarnings
  warnings.filterwarnings('ignore', '', DeprecationWarning, __name__)
  
+try:
+    True, False
+except NameError:
+    True = 1
+    False = 0
+
  
  \f
-def openfile(filename):
+def openfile(filename, mode='r'):
      path = os.path.join(os.path.dirname(landmark), 'data', filename)
-    return open(path, 'r')
+    return open(path, mode)
  
  
  \f
@@ -67,10 +73,10 @@ class TestEmailBase(unittest.TestCase):
          # Python 2.1
          ndiffAssertEqual = unittest.TestCase.assertEqual
  
-    def _msgobj(self, filename):
+    def _msgobj(self, filename, strict=False):
          fp = openfile(findfile(filename))
          try:
-            msg = email.message_from_file(fp)
+            msg = email.message_from_file(fp, strict=strict)
          finally:
              fp.close()
          return msg
@@ -184,20 +190,31 @@ class TestMessageAPI(TestEmailBase):
          eq = self.assertEqual
          msg = self._msgobj('msg_10.txt')
          # The outer message is a multipart
-        eq(msg.get_payload(decode=1), None)
+        eq(msg.get_payload(decode=True), None)
          # Subpart 1 is 7bit encoded
-        eq(msg.get_payload(0).get_payload(decode=1),
+        eq(msg.get_payload(0).get_payload(decode=True),
             'This is a 7bit encoded message.\n')
          # Subpart 2 is quopri
-        eq(msg.get_payload(1).get_payload(decode=1),
+        eq(msg.get_payload(1).get_payload(decode=True),
             '\xa1This is a Quoted Printable encoded message!\n')
          # Subpart 3 is base64
-        eq(msg.get_payload(2).get_payload(decode=1),
+        eq(msg.get_payload(2).get_payload(decode=True),
             'This is a Base64 encoded message.')
          # Subpart 4 has no Content-Transfer-Encoding: header.
-        eq(msg.get_payload(3).get_payload(decode=1),
+        eq(msg.get_payload(3).get_payload(decode=True),
             'This has no Content-Transfer-Encoding: header.\n')
  
+    def test_get_decoded_uu_payload(self):
+        eq = self.assertEqual
+        msg = Message()
+        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
+        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+            msg['content-transfer-encoding'] = cte
+            eq(msg.get_payload(decode=True), 'hello world')
+        # Now try some bogus data
+        msg.set_payload('foo')
+        eq(msg.get_payload(decode=True), 'foo')
+
      def test_decoded_generator(self):
          eq = self.assertEqual
          msg = self._msgobj('msg_07.txt')
@@ -310,11 +327,11 @@ class TestMessageAPI(TestEmailBase):
          eq(msg.get_param('charset'), 'iso-2022-jp')
          msg.set_param('importance', 'high value')
          eq(msg.get_param('importance'), 'high value')
-        eq(msg.get_param('importance', unquote=0), '"high value"')
+        eq(msg.get_param('importance', unquote=False), '"high value"')
          eq(msg.get_params(), [('text/plain', ''),
                                ('charset', 'iso-2022-jp'),
                                ('importance', 'high value')])
-        eq(msg.get_params(unquote=0), [('text/plain', ''),
+        eq(msg.get_params(unquote=False), [('text/plain', ''),
                                         ('charset', '"iso-2022-jp"'),
                                         ('importance', '"high value"')])
          msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
@@ -452,6 +469,14 @@ class TestMessageAPI(TestEmailBase):
          eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
          self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
  
+    def test_broken_base64_payload(self):
+        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
+        msg = Message()
+        msg['content-type'] = 'audio/x-midi'
+        msg['content-transfer-encoding'] = 'base64'
+        msg.set_payload(x)
+        self.assertEqual(msg.get_payload(decode=True), x)
+
  
  \f
  # Test the email.Encoders module
@@ -459,21 +484,21 @@ class TestEncoders(unittest.TestCase):
      def test_encode_noop(self):
          eq = self.assertEqual
          msg = MIMEText('hello world', _encoder=Encoders.encode_noop)
-        eq(msg.get_payload(), 'hello world\n')
+        eq(msg.get_payload(), 'hello world')
  
      def test_encode_7bit(self):
          eq = self.assertEqual
          msg = MIMEText('hello world', _encoder=Encoders.encode_7or8bit)
-        eq(msg.get_payload(), 'hello world\n')
+        eq(msg.get_payload(), 'hello world')
          eq(msg['content-transfer-encoding'], '7bit')
          msg = MIMEText('hello \x7f world', _encoder=Encoders.encode_7or8bit)
-        eq(msg.get_payload(), 'hello \x7f world\n')
+        eq(msg.get_payload(), 'hello \x7f world')
          eq(msg['content-transfer-encoding'], '7bit')
  
      def test_encode_8bit(self):
          eq = self.assertEqual
          msg = MIMEText('hello \x80 world', _encoder=Encoders.encode_7or8bit)
-        eq(msg.get_payload(), 'hello \x80 world\n')
+        eq(msg.get_payload(), 'hello \x80 world')
          eq(msg['content-transfer-encoding'], '8bit')
  
      def test_encode_empty_payload(self):
@@ -485,13 +510,13 @@ class TestEncoders(unittest.TestCase):
      def test_encode_base64(self):
          eq = self.assertEqual
          msg = MIMEText('hello world', _encoder=Encoders.encode_base64)
-        eq(msg.get_payload(), 'aGVsbG8gd29ybGQK\n')
+        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=')
          eq(msg['content-transfer-encoding'], 'base64')
  
      def test_encode_quoted_printable(self):
          eq = self.assertEqual
          msg = MIMEText('hello world', _encoder=Encoders.encode_quopri)
-        eq(msg.get_payload(), 'hello=20world\n')
+        eq(msg.get_payload(), 'hello=20world')
          eq(msg['content-transfer-encoding'], 'quoted-printable')
  
      def test_default_cte(self):
@@ -560,7 +585,7 @@ bug demonstration
          g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
          cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
          utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
-        h = Header(g_head, g)
+        h = Header(g_head, g, header_name='Subject')
          h.append(cz_head, cz)
          h.append(utf8_head, utf8)
          msg = Message()
@@ -568,40 +593,32 @@ bug demonstration
          sfp = StringIO()
          g = Generator(sfp)
          g.flatten(msg)
-        eq(sfp.getvalue(), '''\
-Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
- =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
- =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
- =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
- =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
- =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
- =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
- =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
- =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
- =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
- =?utf-8?q?s_Nunstuck_git_und?=
- =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?=
- =?utf-8?q?r_die_Flipperwaldt?=
- =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=
+        eq(sfp.getvalue(), """\
+Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
+ =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
+ =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
+ =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
+ =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
+ =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
+ =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
+ =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
+ =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
+ =?utf-8?b?44Gm44GE44G+44GZ44CC?=
  
-''')
-        eq(h.encode(), '''\
-=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
- =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
- =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
- =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
- =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
- =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
- =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
- =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
- =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
- =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
- =?utf-8?q?s_Nunstuck_git_und?=
- =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?=
- =?utf-8?q?r_die_Flipperwaldt?=
- =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=''')
+""")
+        eq(h.encode(), """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
+ =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
+ =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
+ =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
+ =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
+ =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
+ =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
+ =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
+ =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
+ =?utf-8?b?44Gm44GE44G+44GZ44CC?=""")
  
      def test_long_header_encode(self):
          eq = self.ndiffAssertEqual
@@ -706,12 +723,13 @@ from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
      def test_long_8bit_header(self):
          eq = self.ndiffAssertEqual
          msg = Message()
-        h = Header('Britische Regierung gibt', 'iso-8859-1')
+        h = Header('Britische Regierung gibt', 'iso-8859-1',
+                    header_name='Subject')
          h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
          msg['Subject'] = h
          eq(msg.as_string(), """\
-Subject: =?iso-8859-1?q?Britische_Regierung_gibt?=
- =?iso-8859-1?q?gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
+Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= =?iso-8859-1?q?gr=FCnes?=
+ =?iso-8859-1?q?_Licht_f=FCr_Offshore-Windkraftprojekte?=
  
  """)
  
@@ -722,6 +740,121 @@ Subject: =?iso-8859-1?q?Britische_Regierung_gibt?=
          eq(msg.as_string(), """\
  Reply-To: Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte <a-very-long-address@example.com>
  
+""")
+
+    def test_long_to_header(self):
+        eq = self.ndiffAssertEqual
+        to = '"Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,"Someone Test #B" <someone@umich.edu>, "Someone Test #C" <someone@eecs.umich.edu>, "Someone Test #D" <someone@eecs.umich.edu>'
+        msg = Message()
+        msg['To'] = to
+        eq(msg.as_string(0), '''\
+To: "Someone Test #A" <someone@eecs.umich.edu>, <someone@eecs.umich.edu>,
+\t"Someone Test #B" <someone@umich.edu>,
+\t"Someone Test #C" <someone@eecs.umich.edu>,
+\t"Someone Test #D" <someone@eecs.umich.edu>
+
+''')
+
+    def test_long_line_after_append(self):
+        eq = self.ndiffAssertEqual
+        s = 'This is an example of string which has almost the limit of header length.'
+        h = Header(s)
+        h.append('Add another line.')
+        eq(h.encode(), """\
+This is an example of string which has almost the limit of header length.
+ Add another line.""")
+
+    def test_shorter_line_with_append(self):
+        eq = self.ndiffAssertEqual
+        s = 'This is a shorter line.'
+        h = Header(s)
+        h.append('Add another sentence. (Surprise?)')
+        eq(h.encode(),
+           'This is a shorter line. Add another sentence. (Surprise?)')
+
+    def test_long_field_name(self):
+        eq = self.ndiffAssertEqual
+        fn = 'X-Very-Very-Very-Long-Header-Name'
+        gs = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
+        h = Header(gs, 'iso-8859-1', header_name=fn)
+        # BAW: this seems broken because the first line is too long
+        eq(h.encode(), """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_?=
+ =?iso-8859-1?q?ein_werden_mit_einem_Foerderband_komfortabel_den_Korridor_?=
+ =?iso-8859-1?q?entlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_g?=
+ =?iso-8859-1?q?egen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
+
+    def test_long_received_header(self):
+        h = 'from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; Wed, 05 Mar 2003 18:10:18 -0700'
+        msg = Message()
+        msg['Received-1'] = Header(h, continuation_ws='\t')
+        msg['Received-2'] = h
+        self.assertEqual(msg.as_string(), """\
+Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
+\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
+\tWed, 05 Mar 2003 18:10:18 -0700
+Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
+\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
+\tWed, 05 Mar 2003 18:10:18 -0700
+
+""")
+
+    def test_string_headerinst_eq(self):
+        h = '<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner\'s message of "Thu, 6 Mar 2003 13:58:21 +0100")'
+        msg = Message()
+        msg['Received-1'] = Header(h, header_name='Received-1',
+                                   continuation_ws='\t')
+        msg['Received-2'] = h
+        self.assertEqual(msg.as_string(), """\
+Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
+\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
+Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
+\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
+
+""")
+
+    def test_long_unbreakable_lines_with_continuation(self):
+        eq = self.ndiffAssertEqual
+        msg = Message()
+        t = """\
+ iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
+        msg['Face-1'] = t
+        msg['Face-2'] = Header(t, header_name='Face-2')
+        eq(msg.as_string(), """\
+Face-1: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+\tlocQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+Face-2: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+
+""")
+
+    def test_another_long_multiline_header(self):
+        eq = self.ndiffAssertEqual
+        m = '''\
+Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
+       Wed, 16 Oct 2002 07:41:11 -0700'''
+        msg = email.message_from_string(m)
+        eq(msg.as_string(), '''\
+Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
+       Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
+
+''')
+
+    def test_long_lines_with_different_header(self):
+        eq = self.ndiffAssertEqual
+        h = """\
+List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>"""
+        msg = Message()
+        msg['List'] = h
+        msg['List'] = Header(h, header_name='List')
+        eq(msg.as_string(), """\
+List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+       <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+
  """)
  
  
@@ -738,7 +871,7 @@ Blah blah blah
  
      def test_mangled_from(self):
          s = StringIO()
-        g = Generator(s, mangle_from_=1)
+        g = Generator(s, mangle_from_=True)
          g.flatten(self.msg)
          self.assertEqual(s.getvalue(), """\
  From: aaa@bbb.org
@@ -749,7 +882,7 @@ Blah blah blah
  
      def test_dont_mangle_from(self):
          s = StringIO()
-        g = Generator(s, mangle_from_=0)
+        g = Generator(s, mangle_from_=False)
          g.flatten(self.msg)
          self.assertEqual(s.getvalue(), """\
  From: aaa@bbb.org
@@ -763,8 +896,13 @@ Blah blah blah
  # Test the basic MIMEAudio class
  class TestMIMEAudio(unittest.TestCase):
      def setUp(self):
-        # In Python, audiotest.au lives in Lib/test not Lib/test/data
-        fp = open(findfile('audiotest.au'), 'rb')
+        # Make sure we pick up the audiotest.au that lives in email/test/data.
+        # In Python, there's an audiotest.au living in Lib/test but that isn't
+        # included in some binary distros that don't include the test
+        # package.  The trailing empty string on the .join() is significant
+        # since findfile() will do a dirname().
+        datadir = os.path.join(os.path.dirname(landmark), 'data', '')
+        fp = open(findfile('audiotest.au', datadir), 'rb')
          try:
              self._audiodata = fp.read()
          finally:
@@ -883,7 +1021,7 @@ class TestMIMEText(unittest.TestCase):
                 is missing)
  
      def test_payload(self):
-        self.assertEqual(self._msg.get_payload(), 'hello there\n')
+        self.assertEqual(self._msg.get_payload(), 'hello there')
          self.failUnless(not self._msg.is_multipart())
  
      def test_charset(self):
@@ -895,7 +1033,7 @@ class TestMIMEText(unittest.TestCase):
  
  \f
  # Test a more complicated multipart/mixed type message
-class TestMultipartMixed(unittest.TestCase):
+class TestMultipartMixed(TestEmailBase):
      def setUp(self):
          fp = openfile('PyBanner048.gif')
          try:
@@ -978,6 +1116,7 @@ From: bperson@dom.ain
  ''')
  
      def test_one_part_in_a_multipart(self):
+        eq = self.ndiffAssertEqual
          outer = MIMEBase('multipart', 'mixed')
          outer['Subject'] = 'A subject'
          outer['To'] = 'aperson@dom.ain'
@@ -987,7 +1126,7 @@ From: bperson@dom.ain
          outer.set_boundary('BOUNDARY')
          msg = MIMEText('hello world')
          outer.attach(msg)
-        self.assertEqual(outer.as_string(), '''\
+        eq(outer.as_string(), '''\
  Content-Type: multipart/mixed; boundary="BOUNDARY"
  MIME-Version: 1.0
  Subject: A subject
@@ -1000,11 +1139,11 @@ MIME-Version: 1.0
  Content-Transfer-Encoding: 7bit
  
  hello world
-
  --BOUNDARY--
  ''')
  
      def test_seq_parts_in_a_multipart(self):
+        eq = self.ndiffAssertEqual
          outer = MIMEBase('multipart', 'mixed')
          outer['Subject'] = 'A subject'
          outer['To'] = 'aperson@dom.ain'
@@ -1014,7 +1153,7 @@ hello world
          msg = MIMEText('hello world')
          outer.attach(msg)
          outer.set_boundary('BOUNDARY')
-        self.assertEqual(outer.as_string(), '''\
+        eq(outer.as_string(), '''\
  Content-Type: multipart/mixed; boundary="BOUNDARY"
  MIME-Version: 1.0
  Subject: A subject
@@ -1027,7 +1166,6 @@ MIME-Version: 1.0
  Content-Transfer-Encoding: 7bit
  
  hello world
-
  --BOUNDARY--
  ''')
  
@@ -1048,7 +1186,7 @@ class TestNonConformant(TestEmailBase):
              data = fp.read()
          finally:
              fp.close()
-        p = Parser(strict=1)
+        p = Parser(strict=True)
          # Note, under a future non-strict parsing mode, this would parse the
          # message into the intended message tree.
          self.assertRaises(Errors.BoundaryError, p.parsestr, data)
@@ -1099,6 +1237,20 @@ message 2
  --BOUNDARY--
  """)
  
+    def test_no_separating_blank_line(self):
+        eq = self.ndiffAssertEqual
+        msg = self._msgobj('msg_35.txt')
+        eq(msg.as_string(), """\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: here's something interesting
+
+counter to RFC 2822, there's no separating newline here
+""")
+        # strict=True should raise an exception
+        self.assertRaises(Errors.HeaderParseError,
+                          self._msgobj, 'msg_35.txt', True)
+
  
  \f
  # Test RFC 2047 header encoding and decoding
@@ -1133,6 +1285,31 @@ class TestRFC2047(unittest.TestCase):
          eq(Utils.encode(s2, charset='iso-8859-2', encoding='b'),
             '=?iso-8859-2?b?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=')
  
+    def test_rfc2047_multiline(self):
+        eq = self.assertEqual
+        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
+ foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
+        dh = decode_header(s)
+        eq(dh, [
+            ('Re:', None),
+            ('r\x8aksm\x9arg\x8cs', 'mac-iceland'),
+            ('baz foo bar', None),
+            ('r\x8aksm\x9arg\x8cs', 'mac-iceland')])
+        eq(str(make_header(dh)),
+           """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar
+ =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""")
+
+    def test_whitespace_eater_unicode(self):
+        eq = self.assertEqual
+        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
+        dh = decode_header(s)
+        eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard@dom.ain>', None)])
+        # Python 2.1's unicode() builtin doesn't call the object's
+        # __unicode__() method.  Use the following alternative instead.
+        #hu = unicode(make_header(dh)).encode('latin-1')
+        hu = make_header(dh).__unicode__().encode('latin-1')
+        eq(hu, 'Andr\xe9 Pirard <pirard@dom.ain>')
+
  
  \f
  # Test the MIMEMessage class
@@ -1263,6 +1440,7 @@ Your message cannot be delivered to the following recipients:
             '<002001c144a6$8752e060$56104586@oxy.edu>')
  
      def test_epilogue(self):
+        eq = self.ndiffAssertEqual
          fp = openfile('msg_21.txt')
          try:
              text = fp.read()
@@ -1282,7 +1460,42 @@ Your message cannot be delivered to the following recipients:
          sfp = StringIO()
          g = Generator(sfp)
          g.flatten(msg)
-        self.assertEqual(sfp.getvalue(), text)
+        eq(sfp.getvalue(), text)
+
+    def test_no_nl_preamble(self):
+        eq = self.ndiffAssertEqual
+        msg = Message()
+        msg['From'] = 'aperson@dom.ain'
+        msg['To'] = 'bperson@dom.ain'
+        msg['Subject'] = 'Test'
+        msg.preamble = 'MIME message'
+        msg.epilogue = ''
+        msg1 = MIMEText('One')
+        msg2 = MIMEText('Two')
+        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
+        msg.attach(msg1)
+        msg.attach(msg2)
+        eq(msg.as_string(), """\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: Test
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+MIME message
+--BOUNDARY
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+
+One
+--BOUNDARY
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+
+Two
+--BOUNDARY--
+""")
  
      def test_default_type(self):
          eq = self.assertEqual
@@ -1494,6 +1707,10 @@ class TestIdempotent(TestEmailBase):
          msg, text = self._msgobj('msg_33.txt')
          self._idempotent(msg, text)
  
+    def test_text_plain_in_a_multipart_digest(self):
+        msg, text = self._msgobj('msg_34.txt')
+        self._idempotent(msg, text)
+
      def test_content_type(self):
          eq = self.assertEquals
          unless = self.failUnless
@@ -1640,12 +1857,17 @@ class TestMiscellaneous(unittest.TestCase):
      def test_formatdate_localtime(self):
          now = time.time()
          self.assertEqual(
-            Utils.parsedate(Utils.formatdate(now, localtime=1))[:6],
+            Utils.parsedate(Utils.formatdate(now, localtime=True))[:6],
              time.localtime(now)[:6])
  
      def test_parsedate_none(self):
          self.assertEqual(Utils.parsedate(''), None)
  
+    def test_parsedate_compact(self):
+        # The FWS after the comma is optional
+        self.assertEqual(Utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
+                         Utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
+
      def test_parseaddr_empty(self):
          self.assertEqual(Utils.parseaddr('<>'), ('', ''))
          self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '')
@@ -1663,6 +1885,23 @@ class TestMiscellaneous(unittest.TestCase):
          b = 'person@dom.ain'
          self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
  
+    def test_escape_backslashes(self):
+        self.assertEqual(
+            Utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
+            r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
+        a = r'Arthur \Backslash\ Foobar'
+        b = 'person@dom.ain'
+        self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+
+    def test_name_with_dot(self):
+        x = 'John X. Doe <jxd@example.com>'
+        y = '"John X. Doe" <jxd@example.com>'
+        a, b = ('John X. Doe', 'jxd@example.com')
+        self.assertEqual(Utils.parseaddr(x), (a, b))
+        self.assertEqual(Utils.parseaddr(y), (a, b))
+        # formataddr() quotes the name if there's a dot in it
+        self.assertEqual(Utils.formataddr((a, b)), y)
+
      def test_quote_dump(self):
          self.assertEqual(
              Utils.formataddr(('A Silly; Person', 'person@dom.ain')),
@@ -1703,6 +1942,16 @@ class TestMiscellaneous(unittest.TestCase):
             [('Al Person', 'aperson@dom.ain'),
              ('Bud Person', 'bperson@dom.ain')])
  
+    def test_getaddresses_nasty(self):
+        eq = self.assertEqual
+        eq(Utils.getaddresses(['foo: ;']), [('', '')])
+        eq(Utils.getaddresses(
+           ['[]*-- =~$']),
+           [('', ''), ('', ''), ('', '*--')])
+        eq(Utils.getaddresses(
+           ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
+           [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+
      def test_utils_quote_unquote(self):
          eq = self.assertEqual
          msg = Message()
@@ -1839,11 +2088,8 @@ Here's the message body
          eq(msg.get_payload(), "Here's the message body\n")
  
      def test_crlf_separation(self):
-        if sys.platform == 'mac':
-            # Skipped in MacPython 2.2.X due to line-end problems
-            return
          eq = self.assertEqual
-        fp = openfile('msg_26.txt')
+        fp = openfile('msg_26.txt', mode='rb')
          try:
              msg = Parser().parse(fp)
          finally:
@@ -1950,7 +2196,7 @@ eHh4eCB4eHh4IA==\r
          # Test the charset option
          eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
          # Test the keep_eols flag
-        eq(he('hello\nworld', keep_eols=1),
+        eq(he('hello\nworld', keep_eols=True),
             '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
          # Test the maxlinelen argument
          eq(he('xxxx ' * 20, maxlinelen=40), """\
@@ -2029,7 +2275,7 @@ class TestQuopri(unittest.TestCase):
          # Test the charset option
          eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
          # Test the keep_eols flag
-        eq(he('hello\nworld', keep_eols=1), '=?iso-8859-1?q?hello=0Aworld?=')
+        eq(he('hello\nworld', keep_eols=True), '=?iso-8859-1?q?hello=0Aworld?=')
          # Test a non-ASCII character
          eq(he('hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
          # Test the maxlinelen argument
@@ -2083,6 +2329,13 @@ two line""")
  \f
  # Test the Charset class
  class TestCharset(unittest.TestCase):
+    def tearDown(self):
+        from email import Charset as CharsetModule
+        try:
+            del CharsetModule.CHARSETS['fake']
+        except KeyError:
+            pass
+
      def test_idempotent(self):
          eq = self.assertEqual
          # Make sure us-ascii = no Unicode conversion
@@ -2095,6 +2348,36 @@ class TestCharset(unittest.TestCase):
          sp = c.to_splittable(s)
          eq(s, c.from_splittable(sp))
  
+    def test_body_encode(self):
+        eq = self.assertEqual
+        # Try a charset with QP body encoding
+        c = Charset('iso-8859-1')
+        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
+        # Try a charset with Base64 body encoding
+        c = Charset('utf-8')
+        eq('aGVsbG8gd29ybGQ=\n', c.body_encode('hello world'))
+        # Try a charset with None body encoding
+        c = Charset('us-ascii')
+        eq('hello world', c.body_encode('hello world'))
+        # Try the convert argument, where input codec <> output codec
+        c = Charset('euc-jp')
+        # With apologies to Tokio Kikuchi ;)
+        try:
+            eq('\x1b$B5FCO;~IW\x1b(B',
+               c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
+            eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
+               c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
+        except LookupError:
+            # We probably don't have the Japanese codecs installed
+            pass
+        # Testing SF bug #625509, which we have to fake, since there are no
+        # built-in encodings where the header encoding is QP but the body
+        # encoding is not.
+        from email import Charset as CharsetModule
+        CharsetModule.add_charset('fake', CharsetModule.QP, None)
+        c = Charset('fake')
+        eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
+
  
  \f
  # Test multilingual MIME headers.
@@ -2104,14 +2387,14 @@ class TestHeader(TestEmailBase):
          h = Header('Hello World!')
          eq(h.encode(), 'Hello World!')
          h.append(' Goodbye World!')
-        eq(h.encode(), 'Hello World! Goodbye World!')
+        eq(h.encode(), 'Hello World!  Goodbye World!')
  
      def test_simple_surprise(self):
          eq = self.ndiffAssertEqual
          h = Header('Hello World!')
          eq(h.encode(), 'Hello World!')
          h.append('Goodbye World!')
-        eq(h.encode(), 'Hello World!Goodbye World!')
+        eq(h.encode(), 'Hello World! Goodbye World!')
  
      def test_header_needs_no_decoding(self):
          h = 'no decoding needed'
@@ -2120,7 +2403,7 @@ class TestHeader(TestEmailBase):
      def test_long(self):
          h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
                     maxlinelen=76)
-        for l in h.encode().split('\n '):
+        for l in h.encode(splitchars=' ').split('\n '):
              self.failUnless(len(l) <= 76)
  
      def test_multilingual(self):
@@ -2135,21 +2418,18 @@ class TestHeader(TestEmailBase):
          h.append(cz_head, cz)
          h.append(utf8_head, utf8)
          enc = h.encode()
-        eq(enc, """=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
- =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
- =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
- =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
- =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
- =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
- =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
- =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
- =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
- =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
- =?utf-8?q?s_Nunstuck_git_und?=
- =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?=
- =?utf-8?q?r_die_Flipperwaldt?=
- =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
+        eq(enc, """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_ko?=
+ =?iso-8859-1?q?mfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wan?=
+ =?iso-8859-1?q?dgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6?=
+ =?iso-8859-1?q?rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
+ =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
+ =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
+ =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
+ =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
+ =?utf-8?q?_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das_Oder_die_Fl?=
+ =?utf-8?b?aXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBo+OBpuOBhOOBvuOBmQ==?=
+ =?utf-8?b?44CC?=""")
          eq(decode_header(enc),
             [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
              (utf8_head, "utf-8")])
@@ -2230,6 +2510,41 @@ A very long line that must get split to something other than at the
          h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
          eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
  
+    def test_bad_8bit_header(self):
+        raises = self.assertRaises
+        eq = self.assertEqual
+        x = 'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+        raises(UnicodeError, Header, x)
+        h = Header()
+        raises(UnicodeError, h.append, x)
+        eq(str(Header(x, errors='replace')), x)
+        h.append(x, errors='replace')
+        eq(str(h), x)
+
+    def test_encoded_adjacent_nonencoded(self):
+        eq = self.assertEqual
+        h = Header()
+        h.append('hello', 'iso-8859-1')
+        h.append('world')
+        s = h.encode()
+        eq(s, '=?iso-8859-1?q?hello?= world')
+        h = make_header(decode_header(s))
+        eq(h.encode(), s)
+
+    def test_whitespace_eater(self):
+        eq = self.assertEqual
+        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
+        parts = decode_header(s)
+        eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)])
+        hdr = make_header(parts)
+        eq(hdr.encode(),
+           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
+
+    def test_broken_base64_header(self):
+        raises = self.assertRaises
+        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?='
+        raises(Errors.HeaderParseError, decode_header, s)
+
  
  \f
  # Test RFC 2231 header parameters (en/de)coding
@@ -2239,7 +2554,7 @@ class TestRFC2231(TestEmailBase):
          msg = self._msgobj('msg_29.txt')
          eq(msg.get_param('title'),
             ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
-        eq(msg.get_param('title', unquote=0),
+        eq(msg.get_param('title', unquote=False),
             ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
  
      def test_set_param(self):
@@ -2314,6 +2629,17 @@ Do you like this message?
          msg = self._msgobj('msg_32.txt')
          eq(msg.get_content_charset(), 'us-ascii')
  
+    def test_rfc2231_no_language_or_charset(self):
+        m = '''\
+Content-Transfer-Encoding: 8bit
+Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
+Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_param('NAME'),
+                         (None, None, 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm'))
+
  
  \f
  def _testclasses():
diff --git a/Lib/email/test/test_email_codecs.py b/Lib/email/test/test_email_codecs.py

index a9a500e340bae2b018ce28a863777486ceff86b0..cd8486a8b85f36f43d87821cc38f9e505e10e7c7 100644 (file)
--- a/Lib/email/test/test_email_codecs.py
+++ b/Lib/email/test/test_email_codecs.py
@@ -28,7 +28,14 @@ class TestEmailAsianCodecs(TestEmailBase):
          ghello = 'Gr\xfc\xdf Gott!'
          h.append(jhello, j)
          h.append(ghello, g)
-        eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=')
+        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
+        # chunks into a single encoded word.  However it doesn't violate the
+        # standard to have them as two encoded chunks and maybe it's
+        # reasonable <wink> for each .append() call to result in a separate
+        # encoded word.
+        eq(h.encode(), """\
+Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
+ =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
          eq(decode_header(h.encode()),
             [('Hello World!', None),
              ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
@@ -37,23 +44,12 @@ class TestEmailAsianCodecs(TestEmailBase):
          h = Header(long, j, header_name="Subject")
          # test a very long header
          enc = h.encode()
-        # BAW: The following used to pass.  Sadly, the test afterwards is what
-        # happens now.  I've no idea which is right.  Please, any Japanese and
-        # RFC 2047 experts, please verify!
-##        eq(enc, '''\
-##=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=
-## =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=
-## =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=''')
-        eq(enc, """\
-=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=
- =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NUcnJHJCVCRDJEYkJCReJDkbKEI=?=""")
-        # BAW: same deal here. :(
-##        self.assertEqual(
-##            decode_header(enc),
-##            [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
-        self.assertEqual(
-            decode_header(enc),
-            [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5G'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
+        # TK: splitting point may differ by codec design and/or Header encoding
+        eq(enc , """\
+=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
+ =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
+        # TK: full decode comparison
+        eq(h.__unicode__().encode('euc-jp'), long)
  
  
  \f
author	Barry Warsaw <barry@python.org>
	Fri, 21 Mar 2003 21:09:32 +0000 (21:09 +0000)
committer	Barry Warsaw <barry@python.org>
	Fri, 21 Mar 2003 21:09:32 +0000 (21:09 +0000)
Lib/email/Charset.py		patch \| blob \| blame \| history
Lib/email/Generator.py		patch \| blob \| blame \| history
Lib/email/Header.py		patch \| blob \| blame \| history
Lib/email/MIMEText.py		patch \| blob \| blame \| history
Lib/email/Message.py		patch \| blob \| blame \| history
Lib/email/Parser.py		patch \| blob \| blame \| history
Lib/email/Utils.py		patch \| blob \| blame \| history
Lib/email/__init__.py		patch \| blob \| blame \| history
Lib/email/_compat21.py		patch \| blob \| blame \| history
Lib/email/_compat22.py		patch \| blob \| blame \| history
Lib/email/base64MIME.py		patch \| blob \| blame \| history
Lib/email/quopriMIME.py		patch \| blob \| blame \| history
Lib/email/test/data/msg_21.txt		patch \| blob \| blame \| history
Lib/email/test/test_email.py		patch \| blob \| blame \| history
Lib/email/test/test_email_codecs.py		patch \| blob \| blame \| history