# input header enc body enc output conv
'iso-8859-1': (QP, QP, None),
'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
'us-ascii': (None, None, None),
'big5': (BASE64, BASE64, None),
'gb2312': (BASE64, BASE64, None),
ALIASES = {
'latin_1': 'iso-8859-1',
'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
'ascii': 'us-ascii',
}
'euc-jp': 'japanese.euc-jp',
'iso-2022-jp': 'japanese.iso-2022-jp',
'shift_jis': 'japanese.shift_jis',
+ 'euc-kr': 'korean.euc-kr',
+ 'ks_c_5601-1987': 'korean.cp949',
+ 'iso-2022-kr': 'korean.iso-2022-kr',
+ 'johab': 'korean.johab',
'gb2132': 'eucgb2312_cn',
'big5': 'big5_tw',
'utf-8': 'utf-8',
def __str__(self):
return self.input_charset.lower()
+ __repr__ = __str__
+
def __eq__(self, other):
return str(self) == str(other).lower()
if self.header_encoding == BASE64:
return email.base64MIME.header_encode(s, cset)
elif self.header_encoding == QP:
- return email.quopriMIME.header_encode(s, cset)
+ return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
elif self.header_encoding == SHORTEST:
lenb64 = email.base64MIME.base64_len(s)
lenqp = email.quopriMIME.header_quopri_len(s)
if lenb64 < lenqp:
return email.base64MIME.header_encode(s, cset)
else:
- return email.quopriMIME.header_encode(s, cset)
+ return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
else:
return s
# 7bit/8bit encodings return the string unchanged (module conversions)
if self.body_encoding is BASE64:
return email.base64MIME.body_encode(s)
- elif self.header_encoding is QP:
+ elif self.body_encoding is QP:
return email.quopriMIME.body_encode(s)
else:
return s
"""Classes to generate plain text from a message object tree.
"""
-import time
import re
+import time
+import locale
import random
from types import ListType, StringType
from cStringIO import StringIO
from email.Header import Header
+from email.Parser import NLCRE
try:
from email._compat22 import _isstring
def _write_headers(self, msg):
for h, v in msg.items():
- # RFC 2822 says that lines SHOULD be no more than maxheaderlen
- # characters wide, so we're well within our rights to split long
- # headers.
- text = '%s: %s' % (h, v)
- if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
- text = self._split_header(text)
- print >> self._fp, text
+ print >> self._fp, '%s:' % h,
+ if self.__maxheaderlen == 0:
+ # Explicit no-wrapping
+ print >> self._fp, v
+ elif isinstance(v, Header):
+ # Header instances know what to do
+ print >> self._fp, v.encode()
+ elif _is8bitstring(v):
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ print >> self._fp, v
+ else:
+ # Header's got lots of smarts, so use it.
+ print >> self._fp, Header(
+ v, maxlinelen=self.__maxheaderlen,
+ header_name=h, continuation_ws='\t').encode()
# A blank line always separates headers from body
print >> self._fp
- def _split_header(self, text):
- maxheaderlen = self.__maxheaderlen
- # Find out whether any lines in the header are really longer than
- # maxheaderlen characters wide. There could be continuation lines
- # that actually shorten it. Also, replace hard tabs with 8 spaces.
- lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
- for line in lines:
- if len(line) > maxheaderlen:
- break
- else:
- # No line was actually longer than maxheaderlen characters, so
- # just return the original unchanged.
- return text
- # If we have raw 8bit data in a byte string, we have no idea what the
- # encoding is. I think there is no safe way to split this string. If
- # it's ascii-subset, then we could do a normal ascii split, but if
- # it's multibyte then we could break the string. There's no way to
- # know so the least harm seems to be to not split the string and risk
- # it being too long.
- if _is8bitstring(text):
- return text
- # The `text' argument already has the field name prepended, so don't
- # provide it here or the first line will get folded too short.
- h = Header(text, maxlinelen=maxheaderlen,
- # For backwards compatibility, we use a hard tab here
- continuation_ws='\t')
- return h.encode()
-
#
# Handlers for writing types and subtypes
#
# Write out any preamble
if msg.preamble is not None:
self._fp.write(msg.preamble)
+ # If preamble is the empty string, the length of the split will be
+ # 1, but the last element will be the empty string. If it's
+ # anything else but does not end in a line separator, the length
+ # will be > 1 and not end in an empty string. We need to
+ # guarantee a newline after the preamble, but don't add too many.
+ plines = NLCRE.split(msg.preamble)
+ if plines <> [''] and plines[-1] <> '':
+ self._fp.write('\n')
# First boundary is a bit different; it doesn't have a leading extra
# newline.
print >> self._fp, '--' + boundary
def _make_boundary(text=None):
# Craft a random boundary. If text is given, ensure that the chosen
# boundary doesn't appear in the text.
- boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
+ dp = locale.localeconv().get('decimal_point', '.')
+ boundary = ('=' * 15) + repr(random.random()).split(dp)[1] + '=='
if text is None:
return boundary
b = boundary
"""Header encoding and decoding functionality."""
import re
+import binascii
from types import StringType, UnicodeType
import email.quopriMIME
import email.base64MIME
+from email.Errors import HeaderParseError
from email.Charset import Charset
try:
CRLFSPACE = '\r\n '
CRLF = '\r\n'
NL = '\n'
+SPACE = ' '
+USPACE = u' '
SPACE8 = ' ' * 8
EMPTYSTRING = ''
+UEMPTYSTRING = u''
MAXLINELEN = 76
\?= # literal ?=
''', re.VERBOSE | re.IGNORECASE)
+pcre = re.compile('([,;])')
+
+# Field name regexp, including trailing colon, but not separating whitespace,
+# according to RFC 2822. Character range is from tilde to exclamation mark.
+# For use with .match()
+fcre = re.compile(r'[\041-\176]+:$')
+
\f
# Helpers
decoded parts of the header. Charset is None for non-encoded parts of the
header, otherwise a lower-case string containing the name of the character
set specified in the encoded string.
+
+ An email.Errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
"""
# If no encoding, just return the header
header = str(header)
if unenc:
# Should we continue a long line?
if decoded and decoded[-1][1] is None:
- decoded[-1] = (decoded[-1][0] + dec, None)
+ decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
else:
decoded.append((unenc, None))
if parts:
charset, encoding = [s.lower() for s in parts[0:2]]
encoded = parts[2]
- dec = ''
+ dec = None
if encoding == 'q':
dec = email.quopriMIME.header_decode(encoded)
elif encoding == 'b':
- dec = email.base64MIME.decode(encoded)
- else:
+ try:
+ dec = email.base64MIME.decode(encoded)
+ except binascii.Error:
+ # Turn this into a higher level exception. BAW: Right
+ # now we throw the lower level exception away but
+ # when/if we get exception chaining, we'll preserve it.
+ raise HeaderParseError
+ if dec is None:
dec = encoded
if decoded and decoded[-1][1] == charset:
\f
class Header:
- def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
- continuation_ws=' '):
+ def __init__(self, s=None, charset=None,
+ maxlinelen=None, header_name=None,
+ continuation_ws=' ', errors='strict'):
"""Create a MIME-compliant header that can contain many character sets.
Optional s is the initial header value. If None, the initial header
continuation_ws must be RFC 2822 compliant folding whitespace (usually
either a space or a hard tab) which will be prepended to continuation
lines.
+
+ errors is passed through to the .append() call.
"""
if charset is None:
charset = USASCII
# BAW: I believe `chunks' and `maxlinelen' should be non-public.
self._chunks = []
if s is not None:
- self.append(s, charset)
+ self.append(s, charset, errors)
if maxlinelen is None:
maxlinelen = MAXLINELEN
if header_name is None:
def __unicode__(self):
"""Helper for the built-in unicode function."""
- # charset item is a Charset instance so we need to stringify it.
- uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
- return u''.join(uchunks)
+ uchunks = []
+ lastcs = None
+ for s, charset in self._chunks:
+ # We must preserve spaces between encoded and non-encoded word
+ # boundaries, which means for us we need to add a space when we go
+ # from a charset to None/us-ascii, or from None/us-ascii to a
+ # charset. Only do this for the second and subsequent chunks.
+ nextcs = charset
+ if uchunks:
+ if lastcs is not None:
+ if nextcs is None or nextcs == 'us-ascii':
+ uchunks.append(USPACE)
+ nextcs = None
+ elif nextcs is not None and nextcs <> 'us-ascii':
+ uchunks.append(USPACE)
+ lastcs = nextcs
+ uchunks.append(unicode(s, str(charset)))
+ return UEMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
# have or explicitly disable <, <=, >, >= operators?
def __ne__(self, other):
return not self == other
- def append(self, s, charset=None):
+ def append(self, s, charset=None, errors='strict'):
"""Append a string to the MIME header.
Optional charset, if given, should be a Charset instance or the name
using RFC 2047 rules, the Unicode string will be encoded using the
following charsets in order: us-ascii, the charset hint, utf-8. The
first character set not to provoke a UnicodeError is used.
+
+ Optional `errors' is passed as the third argument to any unicode() or
+ ustr.encode() call.
"""
if charset is None:
charset = self._charset
# Possibly raise UnicodeError if the byte string can't be
# converted to a unicode with the input codec of the charset.
incodec = charset.input_codec or 'us-ascii'
- ustr = unicode(s, incodec)
+ ustr = unicode(s, incodec, errors)
# Now make sure that the unicode could be converted back to a
# byte string with the output codec, which may be different
# than the iput coded. Still, use the original byte string.
outcodec = charset.output_codec or 'us-ascii'
- ustr.encode(outcodec)
+ ustr.encode(outcodec, errors)
elif isinstance(s, UnicodeType):
# Now we have to be sure the unicode string can be converted
# to a byte string with a reasonable output codec. We want to
for charset in USASCII, charset, UTF8:
try:
outcodec = charset.output_codec or 'us-ascii'
- s = s.encode(outcodec)
+ s = s.encode(outcodec, errors)
break
except UnicodeError:
pass
assert False, 'utf-8 conversion failed'
self._chunks.append((s, charset))
- def _split(self, s, charset, firstline=False):
+ def _split(self, s, charset, maxlinelen, splitchars):
# Split up a header safely for use with encode_chunks.
splittable = charset.to_splittable(s)
- encoded = charset.from_splittable(splittable)
+ encoded = charset.from_splittable(splittable, True)
elen = charset.encoded_header_len(encoded)
-
- if elen <= self._maxlinelen:
+ # If the line's encoded length first, just return it
+ if elen <= maxlinelen:
return [(encoded, charset)]
# If we have undetermined raw 8bit characters sitting in a byte
# string, we really don't know what the right thing to do is. We
# could break if we split it between pairs. The least harm seems to
# be to not split the header at all, but that means they could go out
# longer than maxlinelen.
- elif charset == '8bit':
+ if charset == '8bit':
return [(s, charset)]
# BAW: I'm not sure what the right test here is. What we're trying to
# do is be faithful to RFC 2822's recommendation that ($2.2.3):
# For now, I can only imagine doing this when the charset is us-ascii,
# although it's possible that other charsets may also benefit from the
# higher-level syntactic breaks.
- #
elif charset == 'us-ascii':
- return self._ascii_split(s, charset, firstline)
+ return self._split_ascii(s, charset, maxlinelen, splitchars)
# BAW: should we use encoded?
elif elen == len(s):
# We can split on _maxlinelen boundaries because we know that the
# encoding won't change the size of the string
- splitpnt = self._maxlinelen
+ splitpnt = maxlinelen
first = charset.from_splittable(splittable[:splitpnt], False)
last = charset.from_splittable(splittable[splitpnt:], False)
else:
- # Divide and conquer.
- halfway = _floordiv(len(splittable), 2)
- first = charset.from_splittable(splittable[:halfway], False)
- last = charset.from_splittable(splittable[halfway:], False)
- # Do the split
- return self._split(first, charset, firstline) + \
- self._split(last, charset)
-
- def _ascii_split(self, s, charset, firstline):
- # Attempt to split the line at the highest-level syntactic break
- # possible. Note that we don't have a lot of smarts about field
- # syntax; we just try to break on semi-colons, then whitespace.
- rtn = []
- lines = s.splitlines()
- while lines:
- line = lines.pop(0)
- if firstline:
- maxlinelen = self._firstlinelen
- firstline = False
- else:
- #line = line.lstrip()
- maxlinelen = self._maxlinelen
- # Short lines can remain unchanged
- if len(line.replace('\t', SPACE8)) <= maxlinelen:
- rtn.append(line)
- else:
- oldlen = len(line)
- # Try to break the line on semicolons, but if that doesn't
- # work, try to split on folding whitespace.
- while len(line) > maxlinelen:
- i = line.rfind(';', 0, maxlinelen)
- if i < 0:
- break
- rtn.append(line[:i] + ';')
- line = line[i+1:]
- # Is the remaining stuff still longer than maxlinelen?
- if len(line) <= maxlinelen:
- # Splitting on semis worked
- rtn.append(line)
- continue
- # Splitting on semis didn't finish the job. If it did any
- # work at all, stick the remaining junk on the front of the
- # `lines' sequence and let the next pass do its thing.
- if len(line) <> oldlen:
- lines.insert(0, line)
- continue
- # Otherwise, splitting on semis didn't help at all.
- parts = re.split(r'(\s+)', line)
- if len(parts) == 1 or (len(parts) == 3 and
- parts[0].endswith(':')):
- # This line can't be split on whitespace. There's now
- # little we can do to get this into maxlinelen. BAW:
- # We're still potentially breaking the RFC by possibly
- # allowing lines longer than the absolute maximum of 998
- # characters. For now, let it slide.
- #
- # len(parts) will be 1 if this line has no `Field: '
- # prefix, otherwise it will be len(3).
- rtn.append(line)
- continue
- # There is whitespace we can split on.
- first = parts.pop(0)
- sublines = [first]
- acc = len(first)
- while parts:
- len0 = len(parts[0])
- len1 = len(parts[1])
- if acc + len0 + len1 <= maxlinelen:
- sublines.append(parts.pop(0))
- sublines.append(parts.pop(0))
- acc += len0 + len1
- else:
- # Split it here, but don't forget to ignore the
- # next whitespace-only part
- if first <> '':
- rtn.append(EMPTYSTRING.join(sublines))
- del parts[0]
- first = parts.pop(0)
- sublines = [first]
- acc = len(first)
- rtn.append(EMPTYSTRING.join(sublines))
- return [(chunk, charset) for chunk in rtn]
-
- def _encode_chunks(self, newchunks):
+ # Binary search for split point
+ first, last = _binsplit(splittable, charset, maxlinelen)
+ # first is of the proper length so just wrap it in the appropriate
+ # chrome. last must be recursively split.
+ fsplittable = charset.to_splittable(first)
+ fencoded = charset.from_splittable(fsplittable, True)
+ chunk = [(fencoded, charset)]
+ return chunk + self._split(last, charset, self._maxlinelen, splitchars)
+
+ def _split_ascii(self, s, charset, firstlen, splitchars):
+ chunks = _split_ascii(s, firstlen, self._maxlinelen,
+ self._continuation_ws, splitchars)
+ return zip(chunks, [charset]*len(chunks))
+
+ def _encode_chunks(self, newchunks, maxlinelen):
# MIME-encode a header with many different charsets and/or encodings.
#
# Given a list of pairs (string, charset), return a MIME-encoded
#
# =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
# =?charset2?b?SvxyZ2VuIEL2aW5n?="
- #
chunks = []
for header, charset in newchunks:
+ if not header:
+ continue
if charset is None or charset.header_encoding is None:
- # There's no encoding for this chunk's charsets
- _max_append(chunks, header, self._maxlinelen)
+ s = header
+ else:
+ s = charset.header_encode(header)
+ # Don't add more folding whitespace than necessary
+ if chunks and chunks[-1].endswith(' '):
+ extra = ''
else:
- _max_append(chunks, charset.header_encode(header),
- self._maxlinelen, ' ')
+ extra = ' '
+ _max_append(chunks, s, maxlinelen, extra)
joiner = NL + self._continuation_ws
return joiner.join(chunks)
- def encode(self):
+ def encode(self, splitchars=';, '):
"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
+
+ Optional splitchars is a string containing characters to split long
+ ASCII lines on, in rough support of RFC 2822's `highest level
+ syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
"""
newchunks = []
+ maxlinelen = self._firstlinelen
+ lastlen = 0
for s, charset in self._chunks:
- newchunks += self._split(s, charset, True)
- return self._encode_chunks(newchunks)
+ # The first bit of the next chunk should be just long enough to
+ # fill the next line. Don't forget the space separating the
+ # encoded words.
+ targetlen = maxlinelen - lastlen - 1
+ if targetlen < charset.encoded_header_len(''):
+ # Stick it on the next line
+ targetlen = maxlinelen
+ newchunks += self._split(s, charset, targetlen, splitchars)
+ lastchunk, lastcharset = newchunks[-1]
+ lastlen = lastcharset.encoded_header_len(lastchunk)
+ return self._encode_chunks(newchunks, maxlinelen)
+
+
+\f
+def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
+ lines = []
+ maxlen = firstlen
+ for line in s.splitlines():
+ # Ignore any leading whitespace (i.e. continuation whitespace) already
+ # on the line, since we'll be adding our own.
+ line = line.lstrip()
+ if len(line) < maxlen:
+ lines.append(line)
+ maxlen = restlen
+ continue
+ # Attempt to split the line at the highest-level syntactic break
+ # possible. Note that we don't have a lot of smarts about field
+ # syntax; we just try to break on semi-colons, then commas, then
+ # whitespace.
+ for ch in splitchars:
+ if line.find(ch) >= 0:
+ break
+ else:
+ # There's nothing useful to split the line on, not even spaces, so
+ # just append this line unchanged
+ lines.append(line)
+ maxlen = restlen
+ continue
+ # Now split the line on the character plus trailing whitespace
+ cre = re.compile(r'%s\s*' % ch)
+ if ch in ';,':
+ eol = ch
+ else:
+ eol = ''
+ joiner = eol + ' '
+ joinlen = len(joiner)
+ wslen = len(continuation_ws.replace('\t', SPACE8))
+ this = []
+ linelen = 0
+ for part in cre.split(line):
+ curlen = linelen + max(0, len(this)-1) * joinlen
+ partlen = len(part)
+ onfirstline = not lines
+ # We don't want to split after the field name, if we're on the
+ # first line and the field name is present in the header string.
+ if ch == ' ' and onfirstline and \
+ len(this) == 1 and fcre.match(this[0]):
+ this.append(part)
+ linelen += partlen
+ elif curlen + partlen > maxlen:
+ if this:
+ lines.append(joiner.join(this) + eol)
+ # If this part is longer than maxlen and we aren't already
+ # splitting on whitespace, try to recursively split this line
+ # on whitespace.
+ if partlen > maxlen and ch <> ' ':
+ subl = _split_ascii(part, maxlen, restlen,
+ continuation_ws, ' ')
+ lines.extend(subl[:-1])
+ this = [subl[-1]]
+ else:
+ this = [part]
+ linelen = wslen + len(this[-1])
+ maxlen = restlen
+ else:
+ this.append(part)
+ linelen += partlen
+ # Put any left over parts on a line by themselves
+ if this:
+ lines.append(joiner.join(this))
+ return lines
+
+
+\f
+def _binsplit(splittable, charset, maxlinelen):
+ i = 0
+ j = len(splittable)
+ while i < j:
+ # Invariants:
+ # 1. splittable[:k] fits for all k <= i (note that we *assume*,
+ # at the start, that splittable[:0] fits).
+ # 2. splittable[:k] does not fit for any k > j (at the start,
+ # this means we shouldn't look at any k > len(splittable)).
+ # 3. We don't know about splittable[:k] for k in i+1..j.
+ # 4. We want to set i to the largest k that fits, with i <= k <= j.
+ #
+ m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
+ chunk = charset.from_splittable(splittable[:m], True)
+ chunklen = charset.encoded_header_len(chunk)
+ if chunklen <= maxlinelen:
+ # m is acceptable, so is a new lower bound.
+ i = m
+ else:
+ # m is not acceptable, so final i must be < m.
+ j = m - 1
+ # i == j. Invariant #1 implies that splittable[:i] fits, and
+ # invariant #2 implies that splittable[:i+1] does not fit, so i
+ # is what we're looking for.
+ first = charset.from_splittable(splittable[:i], False)
+ last = charset.from_splittable(splittable[i:], False)
+ return first, last
_encoder=None):
"""Create a text/* type MIME document.
- _text is the string for this message object. If the text does not end
- in a newline, one is added.
+ _text is the string for this message object.
_subtype is the MIME sub content type, defaulting to "plain".
"""
MIMENonMultipart.__init__(self, 'text', _subtype,
**{'charset': _charset})
- if _text and not _text.endswith('\n'):
- _text += '\n'
self.set_payload(_text, _charset)
if _encoder is not None:
warnings.warn('_encoder argument is obsolete.',
"""
import re
+import uu
+import binascii
import warnings
from cStringIO import StringIO
from types import ListType, TupleType, StringType
# Intrapackage imports
-from email import Errors
from email import Utils
+from email import Errors
from email import Charset
SEMISPACE = '; '
the list object, you modify the message's payload in place. Optional
i returns that index into the payload.
- Optional decode is a flag (defaulting to False) indicating whether the
- payload should be decoded or not, according to the
- Content-Transfer-Encoding header. When True and the message is not a
- multipart, the payload will be decoded if this header's value is
- `quoted-printable' or `base64'. If some other encoding is used, or
- the header is missing, the payload is returned as-is (undecoded). If
- the message is a multipart and the decode flag is True, then None is
- returned.
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding header
+ (default is False).
+
+ When True and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, or if the
+ payload has bogus data (i.e. bogus base64 or uuencoded data), the
+ payload is returned as-is.
+
+ If the message is a multipart and the decode flag is True, then None
+ is returned.
"""
if i is None:
payload = self._payload
if decode:
if self.is_multipart():
return None
- cte = self.get('content-transfer-encoding', '')
- if cte.lower() == 'quoted-printable':
+ cte = self.get('content-transfer-encoding', '').lower()
+ if cte == 'quoted-printable':
return Utils._qdecode(payload)
- elif cte.lower() == 'base64':
- return Utils._bdecode(payload)
+ elif cte == 'base64':
+ try:
+ return Utils._bdecode(payload)
+ except binascii.Error:
+ # Incorrect padding
+ return payload
+ elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ sfp = StringIO()
+ try:
+ uu.decode(StringIO(payload+'\n'), sfp)
+ payload = sfp.getvalue()
+ except uu.Error:
+ # Some decoding problem
+ return payload
# Everything else, including encodings with 8bit or 7bit are returned
# unchanged.
return payload
True = 1
False = 0
-nlcre = re.compile('\r\n|\r|\n')
+NLCRE = re.compile('\r\n|\r|\n')
\f
meaning it parses the entire contents of the file.
"""
root = self._class()
- self._parseheaders(root, fp)
+ firstbodyline = self._parseheaders(root, fp)
if not headersonly:
- self._parsebody(root, fp)
+ self._parsebody(root, fp, firstbodyline)
return root
def parsestr(self, text, headersonly=False):
lastheader = ''
lastvalue = []
lineno = 0
+ firstbodyline = None
while True:
# Don't strip the line before we test for the end condition,
# because whitespace-only header lines are RFC compliant
if i < 0:
if self._strict:
raise Errors.HeaderParseError(
- "Not a header, not a continuation: ``%s''"%line)
+ "Not a header, not a continuation: ``%s''" % line)
elif lineno == 1 and line.startswith('--'):
# allow through duplicate boundary tags.
continue
else:
- raise Errors.HeaderParseError(
- "Not a header, not a continuation: ``%s''"%line)
+ # There was no separating blank line as mandated by RFC
+ # 2822, but we're in non-strict mode. So just offer up
+ # this current line as the first body line.
+ firstbodyline = line
+ break
if lastheader:
container[lastheader] = NL.join(lastvalue)
lastheader = line[:i]
# Make sure we retain the last header
if lastheader:
container[lastheader] = NL.join(lastvalue)
+ return firstbodyline
- def _parsebody(self, container, fp):
+ def _parsebody(self, container, fp, firstbodyline=None):
# Parse the body, but first split the payload on the content-type
# boundary if present.
boundary = container.get_boundary()
# boundary.
separator = '--' + boundary
payload = fp.read()
+ if firstbodyline is not None:
+ payload = firstbodyline + '\n' + payload
# We use an RE here because boundaries can have trailing
# whitespace.
mo = re.search(
preamble = payload[0:start]
# Find out what kind of line endings we're using
start += len(mo.group('sep')) + len(mo.group('ws'))
- mo = nlcre.search(payload, start)
+ mo = NLCRE.search(payload, start)
if mo:
start += len(mo.group(0))
# We create a compiled regexp first because we need to be able to
# msgobj in this case is the "message/rfc822" container
msgobj = self.parsestr(parthdrs, headersonly=1)
# while submsgobj is the message itself
- submsgobj = self.parsestr(part)
- msgobj.attach(submsgobj)
msgobj.set_default_type('message/rfc822')
+ maintype = msgobj.get_content_maintype()
+ if maintype in ('message', 'multipart'):
+ submsgobj = self.parsestr(part)
+ msgobj.attach(submsgobj)
+ else:
+ msgobj.set_payload(part)
else:
msgobj = self.parsestr(part)
container.preamble = preamble
self._parsebody(msg, fp)
container.attach(msg)
else:
- container.set_payload(fp.read())
+ text = fp.read()
+ if firstbodyline is not None:
+ text = firstbodyline + '\n' + text
+ container.set_payload(text)
\f
Parsing with this subclass can be considerably faster if all you're
interested in is the message headers.
"""
- def _parsebody(self, container, fp):
+ def _parsebody(self, container, fp, firstbodyline=None):
# Consume but do not parse, the body
- container.set_payload(fp.read())
+ text = fp.read()
+ if firstbodyline is not None:
+ text = firstbodyline + '\n' + text
+ container.set_payload(text)
from cStringIO import StringIO
from types import ListType
-from rfc822 import quote
-from rfc822 import AddressList as _AddressList
-from rfc822 import mktime_tz
+from email._parseaddr import quote
+from email._parseaddr import AddressList as _AddressList
+from email._parseaddr import mktime_tz
# We need wormarounds for bugs in these methods in older Pythons (see below)
-from rfc822 import parsedate as _parsedate
-from rfc822 import parsedate_tz as _parsedate_tz
+from email._parseaddr import parsedate as _parsedate
+from email._parseaddr import parsedate_tz as _parsedate_tz
try:
True, False
UEMPTYSTRING = u''
CRLF = '\r\n'
-specialsre = re.compile(r'[][\()<>@,:;".]')
-escapesre = re.compile(r'[][\()"]')
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[][\\()"]')
\f
def _bdecode(s):
- if not s:
- return s
# We can't quite use base64.encodestring() since it tacks on a "courtesy
# newline". Blech!
if not s:
def decode_rfc2231(s):
"""Decode string according to RFC 2231"""
import urllib
- charset, language, s = s.split("'", 2)
- s = urllib.unquote(s)
- return charset, language, s
+ parts = s.split("'", 2)
+ if len(parts) == 1:
+ return None, None, s
+ charset, language, s = parts
+ return charset, language, urllib.unquote(s)
def encode_rfc2231(s, charset=None, language=None):
for num, continuation in continuations:
value.append(continuation)
charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
- new_params.append((name,
- (charset, language, '"%s"' % quote(value))))
+ new_params.append(
+ (name, (charset, language, '"%s"' % quote(value))))
return new_params
"""A package for parsing, handling, and generating email messages.
"""
-__version__ = '2.4.3'
+__version__ = '2.5'
__all__ = [
'base64MIME',
from cStringIO import StringIO
from types import StringType, UnicodeType
+False = 0
+True = 1
+
\f
# This function will become a method of the Message class
def _isstring(obj):
- return isinstance(obj, StringType) or isinstance(obj, UnicodeType)
+ return isinstance(obj, StringType) or isinstance(obj, UnicodeType)
\f
# These two functions are imported into the Iterators.py interface module.
# The Python 2.2 version uses generators for efficiency.
-def body_line_iterator(msg):
- """Iterate over the parts, returning string payloads line-by-line."""
+def body_line_iterator(msg, decode=False):
+ """Iterate over the parts, returning string payloads line-by-line.
+
+ Optional decode (default False) is passed through to .get_payload().
+ """
lines = []
for subpart in msg.walk():
- payload = subpart.get_payload()
+ payload = subpart.get_payload(decode=decode)
if _isstring(payload):
for line in StringIO(payload).readlines():
lines.append(line)
\f
# These two functions are imported into the Iterators.py interface module.
# The Python 2.2 version uses generators for efficiency.
-def body_line_iterator(msg):
- """Iterate over the parts, returning string payloads line-by-line."""
+def body_line_iterator(msg, decode=False):
+ """Iterate over the parts, returning string payloads line-by-line.
+
+ Optional decode (default False) is passed through to .get_payload().
+ """
for subpart in msg.walk():
- payload = subpart.get_payload()
+ payload = subpart.get_payload(decode=decode)
if _isstring(payload):
for line in StringIO(payload):
yield line
max_encoded = maxlinelen - len(charset) - MISC_LEN
max_unencoded = _floordiv(max_encoded * 3, 4)
- # BAW: Ben's original code used a step of max_unencoded, but I think it
- # ought to be max_encoded. Otherwise, where's max_encoded used? I'm
- # still not sure what the
for i in range(0, len(header), max_unencoded):
base64ed.append(b2a_base64(header[i:i+max_unencoded]))
def _max_append(L, s, maxlen, extra=''):
if not L:
L.append(s.lstrip())
- elif len(L[-1]) + len(s) < maxlen:
+ elif len(L[-1]) + len(s) <= maxlen:
L[-1] += extra + s
else:
L.append(s.lstrip())
=?charset?q?Silly_=C8nglish_Kn=EEghts?="
with each line wrapped safely at, at most, maxlinelen characters (defaults
- to 76 characters).
+ to 76 characters). If maxlinelen is None, the entire string is encoded in
+ one chunk with no splitting.
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
to the canonical email line separator \\r\\n unless the keep_eols
header = fix_eols(header)
# Quopri encode each line, in encoded chunks no greater than maxlinelen in
- # lenght, after the RFC chrome is added in.
+ # length, after the RFC chrome is added in.
quoted = []
- max_encoded = maxlinelen - len(charset) - MISC_LEN
+ if maxlinelen is None:
+ # An obnoxiously large number that's good enough
+ max_encoded = 100000
+ else:
+ max_encoded = maxlinelen - len(charset) - MISC_LEN - 1
for c in header:
# Space may be represented as _ instead of =20 for readability
Content-Transfer-Encoding: 7bit
One
-
--BOUNDARY
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Two
-
--BOUNDARY--
End of MIME message
-# Copyright (C) 2001,2002 Python Software Foundation
+# Copyright (C) 2001,2002,2003 Python Software Foundation
# email package unit tests
-import sys
import os
+import sys
import time
-import unittest
import base64
import difflib
+import unittest
+import warnings
from cStringIO import StringIO
from types import StringType, ListType
-import warnings
import email
# We don't care about DeprecationWarnings
warnings.filterwarnings('ignore', '', DeprecationWarning, __name__)
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
\f
-def openfile(filename):
+def openfile(filename, mode='r'):
path = os.path.join(os.path.dirname(landmark), 'data', filename)
- return open(path, 'r')
+ return open(path, mode)
\f
# Python 2.1
ndiffAssertEqual = unittest.TestCase.assertEqual
- def _msgobj(self, filename):
+ def _msgobj(self, filename, strict=False):
fp = openfile(findfile(filename))
try:
- msg = email.message_from_file(fp)
+ msg = email.message_from_file(fp, strict=strict)
finally:
fp.close()
return msg
eq = self.assertEqual
msg = self._msgobj('msg_10.txt')
# The outer message is a multipart
- eq(msg.get_payload(decode=1), None)
+ eq(msg.get_payload(decode=True), None)
# Subpart 1 is 7bit encoded
- eq(msg.get_payload(0).get_payload(decode=1),
+ eq(msg.get_payload(0).get_payload(decode=True),
'This is a 7bit encoded message.\n')
# Subpart 2 is quopri
- eq(msg.get_payload(1).get_payload(decode=1),
+ eq(msg.get_payload(1).get_payload(decode=True),
'\xa1This is a Quoted Printable encoded message!\n')
# Subpart 3 is base64
- eq(msg.get_payload(2).get_payload(decode=1),
+ eq(msg.get_payload(2).get_payload(decode=True),
'This is a Base64 encoded message.')
# Subpart 4 has no Content-Transfer-Encoding: header.
- eq(msg.get_payload(3).get_payload(decode=1),
+ eq(msg.get_payload(3).get_payload(decode=True),
'This has no Content-Transfer-Encoding: header.\n')
+ def test_get_decoded_uu_payload(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
+ for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ msg['content-transfer-encoding'] = cte
+ eq(msg.get_payload(decode=True), 'hello world')
+ # Now try some bogus data
+ msg.set_payload('foo')
+ eq(msg.get_payload(decode=True), 'foo')
+
def test_decoded_generator(self):
eq = self.assertEqual
msg = self._msgobj('msg_07.txt')
eq(msg.get_param('charset'), 'iso-2022-jp')
msg.set_param('importance', 'high value')
eq(msg.get_param('importance'), 'high value')
- eq(msg.get_param('importance', unquote=0), '"high value"')
+ eq(msg.get_param('importance', unquote=False), '"high value"')
eq(msg.get_params(), [('text/plain', ''),
('charset', 'iso-2022-jp'),
('importance', 'high value')])
- eq(msg.get_params(unquote=0), [('text/plain', ''),
+ eq(msg.get_params(unquote=False), [('text/plain', ''),
('charset', '"iso-2022-jp"'),
('importance', '"high value"')])
msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
+ def test_broken_base64_payload(self):
+ x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
+ msg = Message()
+ msg['content-type'] = 'audio/x-midi'
+ msg['content-transfer-encoding'] = 'base64'
+ msg.set_payload(x)
+ self.assertEqual(msg.get_payload(decode=True), x)
+
\f
# Test the email.Encoders module
def test_encode_noop(self):
eq = self.assertEqual
msg = MIMEText('hello world', _encoder=Encoders.encode_noop)
- eq(msg.get_payload(), 'hello world\n')
+ eq(msg.get_payload(), 'hello world')
def test_encode_7bit(self):
eq = self.assertEqual
msg = MIMEText('hello world', _encoder=Encoders.encode_7or8bit)
- eq(msg.get_payload(), 'hello world\n')
+ eq(msg.get_payload(), 'hello world')
eq(msg['content-transfer-encoding'], '7bit')
msg = MIMEText('hello \x7f world', _encoder=Encoders.encode_7or8bit)
- eq(msg.get_payload(), 'hello \x7f world\n')
+ eq(msg.get_payload(), 'hello \x7f world')
eq(msg['content-transfer-encoding'], '7bit')
def test_encode_8bit(self):
eq = self.assertEqual
msg = MIMEText('hello \x80 world', _encoder=Encoders.encode_7or8bit)
- eq(msg.get_payload(), 'hello \x80 world\n')
+ eq(msg.get_payload(), 'hello \x80 world')
eq(msg['content-transfer-encoding'], '8bit')
def test_encode_empty_payload(self):
def test_encode_base64(self):
eq = self.assertEqual
msg = MIMEText('hello world', _encoder=Encoders.encode_base64)
- eq(msg.get_payload(), 'aGVsbG8gd29ybGQK\n')
+ eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=')
eq(msg['content-transfer-encoding'], 'base64')
def test_encode_quoted_printable(self):
eq = self.assertEqual
msg = MIMEText('hello world', _encoder=Encoders.encode_quopri)
- eq(msg.get_payload(), 'hello=20world\n')
+ eq(msg.get_payload(), 'hello=20world')
eq(msg['content-transfer-encoding'], 'quoted-printable')
def test_default_cte(self):
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
- h = Header(g_head, g)
+ h = Header(g_head, g, header_name='Subject')
h.append(cz_head, cz)
h.append(utf8_head, utf8)
msg = Message()
sfp = StringIO()
g = Generator(sfp)
g.flatten(msg)
- eq(sfp.getvalue(), '''\
-Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
- =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
- =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
- =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
- =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
- =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
- =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
- =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
- =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
- =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
- =?utf-8?q?s_Nunstuck_git_und?=
- =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?=
- =?utf-8?q?r_die_Flipperwaldt?=
- =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=
+ eq(sfp.getvalue(), """\
+Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
+ =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
+ =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
+ =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
+ =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
+ =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
+ =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
+ =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
+ =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
+ =?utf-8?b?44Gm44GE44G+44GZ44CC?=
-''')
- eq(h.encode(), '''\
-=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
- =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
- =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
- =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
- =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
- =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
- =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
- =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
- =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
- =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
- =?utf-8?q?s_Nunstuck_git_und?=
- =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?=
- =?utf-8?q?r_die_Flipperwaldt?=
- =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=''')
+""")
+ eq(h.encode(), """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
+ =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
+ =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
+ =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
+ =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
+ =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
+ =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
+ =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
+ =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
+ =?utf-8?b?44Gm44GE44G+44GZ44CC?=""")
def test_long_header_encode(self):
eq = self.ndiffAssertEqual
def test_long_8bit_header(self):
eq = self.ndiffAssertEqual
msg = Message()
- h = Header('Britische Regierung gibt', 'iso-8859-1')
+ h = Header('Britische Regierung gibt', 'iso-8859-1',
+ header_name='Subject')
h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
msg['Subject'] = h
eq(msg.as_string(), """\
-Subject: =?iso-8859-1?q?Britische_Regierung_gibt?=
- =?iso-8859-1?q?gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
+Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= =?iso-8859-1?q?gr=FCnes?=
+ =?iso-8859-1?q?_Licht_f=FCr_Offshore-Windkraftprojekte?=
""")
eq(msg.as_string(), """\
Reply-To: Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte <a-very-long-address@example.com>
+""")
+
+ def test_long_to_header(self):
+ eq = self.ndiffAssertEqual
+ to = '"Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,"Someone Test #B" <someone@umich.edu>, "Someone Test #C" <someone@eecs.umich.edu>, "Someone Test #D" <someone@eecs.umich.edu>'
+ msg = Message()
+ msg['To'] = to
+ eq(msg.as_string(0), '''\
+To: "Someone Test #A" <someone@eecs.umich.edu>, <someone@eecs.umich.edu>,
+\t"Someone Test #B" <someone@umich.edu>,
+\t"Someone Test #C" <someone@eecs.umich.edu>,
+\t"Someone Test #D" <someone@eecs.umich.edu>
+
+''')
+
+ def test_long_line_after_append(self):
+ eq = self.ndiffAssertEqual
+ s = 'This is an example of string which has almost the limit of header length.'
+ h = Header(s)
+ h.append('Add another line.')
+ eq(h.encode(), """\
+This is an example of string which has almost the limit of header length.
+ Add another line.""")
+
+ def test_shorter_line_with_append(self):
+ eq = self.ndiffAssertEqual
+ s = 'This is a shorter line.'
+ h = Header(s)
+ h.append('Add another sentence. (Surprise?)')
+ eq(h.encode(),
+ 'This is a shorter line. Add another sentence. (Surprise?)')
+
+ def test_long_field_name(self):
+ eq = self.ndiffAssertEqual
+ fn = 'X-Very-Very-Very-Long-Header-Name'
+ gs = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
+ h = Header(gs, 'iso-8859-1', header_name=fn)
+ # BAW: this seems broken because the first line is too long
+ eq(h.encode(), """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_?=
+ =?iso-8859-1?q?ein_werden_mit_einem_Foerderband_komfortabel_den_Korridor_?=
+ =?iso-8859-1?q?entlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_g?=
+ =?iso-8859-1?q?egen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
+
+ def test_long_received_header(self):
+ h = 'from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; Wed, 05 Mar 2003 18:10:18 -0700'
+ msg = Message()
+ msg['Received-1'] = Header(h, continuation_ws='\t')
+ msg['Received-2'] = h
+ self.assertEqual(msg.as_string(), """\
+Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
+\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
+\tWed, 05 Mar 2003 18:10:18 -0700
+Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
+\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
+\tWed, 05 Mar 2003 18:10:18 -0700
+
+""")
+
+ def test_string_headerinst_eq(self):
+ h = '<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner\'s message of "Thu, 6 Mar 2003 13:58:21 +0100")'
+ msg = Message()
+ msg['Received-1'] = Header(h, header_name='Received-1',
+ continuation_ws='\t')
+ msg['Received-2'] = h
+ self.assertEqual(msg.as_string(), """\
+Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
+\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
+Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
+\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
+
+""")
+
+ def test_long_unbreakable_lines_with_continuation(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ t = """\
+ iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
+ msg['Face-1'] = t
+ msg['Face-2'] = Header(t, header_name='Face-2')
+ eq(msg.as_string(), """\
+Face-1: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+\tlocQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+Face-2: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+
+""")
+
+ def test_another_long_multiline_header(self):
+ eq = self.ndiffAssertEqual
+ m = '''\
+Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
+ Wed, 16 Oct 2002 07:41:11 -0700'''
+ msg = email.message_from_string(m)
+ eq(msg.as_string(), '''\
+Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
+ Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
+
+''')
+
+ def test_long_lines_with_different_header(self):
+ eq = self.ndiffAssertEqual
+ h = """\
+List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>"""
+ msg = Message()
+ msg['List'] = h
+ msg['List'] = Header(h, header_name='List')
+ eq(msg.as_string(), """\
+List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+
""")
def test_mangled_from(self):
s = StringIO()
- g = Generator(s, mangle_from_=1)
+ g = Generator(s, mangle_from_=True)
g.flatten(self.msg)
self.assertEqual(s.getvalue(), """\
From: aaa@bbb.org
def test_dont_mangle_from(self):
s = StringIO()
- g = Generator(s, mangle_from_=0)
+ g = Generator(s, mangle_from_=False)
g.flatten(self.msg)
self.assertEqual(s.getvalue(), """\
From: aaa@bbb.org
# Test the basic MIMEAudio class
class TestMIMEAudio(unittest.TestCase):
def setUp(self):
- # In Python, audiotest.au lives in Lib/test not Lib/test/data
- fp = open(findfile('audiotest.au'), 'rb')
+ # Make sure we pick up the audiotest.au that lives in email/test/data.
+ # In Python, there's an audiotest.au living in Lib/test but that isn't
+ # included in some binary distros that don't include the test
+ # package. The trailing empty string on the .join() is significant
+ # since findfile() will do a dirname().
+ datadir = os.path.join(os.path.dirname(landmark), 'data', '')
+ fp = open(findfile('audiotest.au', datadir), 'rb')
try:
self._audiodata = fp.read()
finally:
is missing)
def test_payload(self):
- self.assertEqual(self._msg.get_payload(), 'hello there\n')
+ self.assertEqual(self._msg.get_payload(), 'hello there')
self.failUnless(not self._msg.is_multipart())
def test_charset(self):
\f
# Test a more complicated multipart/mixed type message
-class TestMultipartMixed(unittest.TestCase):
+class TestMultipartMixed(TestEmailBase):
def setUp(self):
fp = openfile('PyBanner048.gif')
try:
''')
def test_one_part_in_a_multipart(self):
+ eq = self.ndiffAssertEqual
outer = MIMEBase('multipart', 'mixed')
outer['Subject'] = 'A subject'
outer['To'] = 'aperson@dom.ain'
outer.set_boundary('BOUNDARY')
msg = MIMEText('hello world')
outer.attach(msg)
- self.assertEqual(outer.as_string(), '''\
+ eq(outer.as_string(), '''\
Content-Type: multipart/mixed; boundary="BOUNDARY"
MIME-Version: 1.0
Subject: A subject
Content-Transfer-Encoding: 7bit
hello world
-
--BOUNDARY--
''')
def test_seq_parts_in_a_multipart(self):
+ eq = self.ndiffAssertEqual
outer = MIMEBase('multipart', 'mixed')
outer['Subject'] = 'A subject'
outer['To'] = 'aperson@dom.ain'
msg = MIMEText('hello world')
outer.attach(msg)
outer.set_boundary('BOUNDARY')
- self.assertEqual(outer.as_string(), '''\
+ eq(outer.as_string(), '''\
Content-Type: multipart/mixed; boundary="BOUNDARY"
MIME-Version: 1.0
Subject: A subject
Content-Transfer-Encoding: 7bit
hello world
-
--BOUNDARY--
''')
data = fp.read()
finally:
fp.close()
- p = Parser(strict=1)
+ p = Parser(strict=True)
# Note, under a future non-strict parsing mode, this would parse the
# message into the intended message tree.
self.assertRaises(Errors.BoundaryError, p.parsestr, data)
--BOUNDARY--
""")
+ def test_no_separating_blank_line(self):
+ eq = self.ndiffAssertEqual
+ msg = self._msgobj('msg_35.txt')
+ eq(msg.as_string(), """\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: here's something interesting
+
+counter to RFC 2822, there's no separating newline here
+""")
+ # strict=True should raise an exception
+ self.assertRaises(Errors.HeaderParseError,
+ self._msgobj, 'msg_35.txt', True)
+
\f
# Test RFC 2047 header encoding and decoding
eq(Utils.encode(s2, charset='iso-8859-2', encoding='b'),
'=?iso-8859-2?b?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=')
+ def test_rfc2047_multiline(self):
+ eq = self.assertEqual
+ s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
+ foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
+ dh = decode_header(s)
+ eq(dh, [
+ ('Re:', None),
+ ('r\x8aksm\x9arg\x8cs', 'mac-iceland'),
+ ('baz foo bar', None),
+ ('r\x8aksm\x9arg\x8cs', 'mac-iceland')])
+ eq(str(make_header(dh)),
+ """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar
+ =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""")
+
+ def test_whitespace_eater_unicode(self):
+ eq = self.assertEqual
+ s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
+ dh = decode_header(s)
+ eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard@dom.ain>', None)])
+ # Python 2.1's unicode() builtin doesn't call the object's
+ # __unicode__() method. Use the following alternative instead.
+ #hu = unicode(make_header(dh)).encode('latin-1')
+ hu = make_header(dh).__unicode__().encode('latin-1')
+ eq(hu, 'Andr\xe9 Pirard <pirard@dom.ain>')
+
\f
# Test the MIMEMessage class
'<002001c144a6$8752e060$56104586@oxy.edu>')
def test_epilogue(self):
+ eq = self.ndiffAssertEqual
fp = openfile('msg_21.txt')
try:
text = fp.read()
sfp = StringIO()
g = Generator(sfp)
g.flatten(msg)
- self.assertEqual(sfp.getvalue(), text)
+ eq(sfp.getvalue(), text)
+
+ def test_no_nl_preamble(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'aperson@dom.ain'
+ msg['To'] = 'bperson@dom.ain'
+ msg['Subject'] = 'Test'
+ msg.preamble = 'MIME message'
+ msg.epilogue = ''
+ msg1 = MIMEText('One')
+ msg2 = MIMEText('Two')
+ msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
+ msg.attach(msg1)
+ msg.attach(msg2)
+ eq(msg.as_string(), """\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: Test
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+MIME message
+--BOUNDARY
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+
+One
+--BOUNDARY
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+
+Two
+--BOUNDARY--
+""")
def test_default_type(self):
eq = self.assertEqual
msg, text = self._msgobj('msg_33.txt')
self._idempotent(msg, text)
+ def test_text_plain_in_a_multipart_digest(self):
+ msg, text = self._msgobj('msg_34.txt')
+ self._idempotent(msg, text)
+
def test_content_type(self):
eq = self.assertEquals
unless = self.failUnless
def test_formatdate_localtime(self):
now = time.time()
self.assertEqual(
- Utils.parsedate(Utils.formatdate(now, localtime=1))[:6],
+ Utils.parsedate(Utils.formatdate(now, localtime=True))[:6],
time.localtime(now)[:6])
def test_parsedate_none(self):
self.assertEqual(Utils.parsedate(''), None)
+ def test_parsedate_compact(self):
+ # The FWS after the comma is optional
+ self.assertEqual(Utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
+ Utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
+
def test_parseaddr_empty(self):
self.assertEqual(Utils.parseaddr('<>'), ('', ''))
self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '')
b = 'person@dom.ain'
self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+ def test_escape_backslashes(self):
+ self.assertEqual(
+ Utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
+ r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
+ a = r'Arthur \Backslash\ Foobar'
+ b = 'person@dom.ain'
+ self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+
+ def test_name_with_dot(self):
+ x = 'John X. Doe <jxd@example.com>'
+ y = '"John X. Doe" <jxd@example.com>'
+ a, b = ('John X. Doe', 'jxd@example.com')
+ self.assertEqual(Utils.parseaddr(x), (a, b))
+ self.assertEqual(Utils.parseaddr(y), (a, b))
+ # formataddr() quotes the name if there's a dot in it
+ self.assertEqual(Utils.formataddr((a, b)), y)
+
def test_quote_dump(self):
self.assertEqual(
Utils.formataddr(('A Silly; Person', 'person@dom.ain')),
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
+ def test_getaddresses_nasty(self):
+ eq = self.assertEqual
+ eq(Utils.getaddresses(['foo: ;']), [('', '')])
+ eq(Utils.getaddresses(
+ ['[]*-- =~$']),
+ [('', ''), ('', ''), ('', '*--')])
+ eq(Utils.getaddresses(
+ ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+
def test_utils_quote_unquote(self):
eq = self.assertEqual
msg = Message()
eq(msg.get_payload(), "Here's the message body\n")
def test_crlf_separation(self):
- if sys.platform == 'mac':
- # Skipped in MacPython 2.2.X due to line-end problems
- return
eq = self.assertEqual
- fp = openfile('msg_26.txt')
+ fp = openfile('msg_26.txt', mode='rb')
try:
msg = Parser().parse(fp)
finally:
# Test the charset option
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
# Test the keep_eols flag
- eq(he('hello\nworld', keep_eols=1),
+ eq(he('hello\nworld', keep_eols=True),
'=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
# Test the maxlinelen argument
eq(he('xxxx ' * 20, maxlinelen=40), """\
# Test the charset option
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
# Test the keep_eols flag
- eq(he('hello\nworld', keep_eols=1), '=?iso-8859-1?q?hello=0Aworld?=')
+ eq(he('hello\nworld', keep_eols=True), '=?iso-8859-1?q?hello=0Aworld?=')
# Test a non-ASCII character
eq(he('hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
# Test the maxlinelen argument
\f
# Test the Charset class
class TestCharset(unittest.TestCase):
+ def tearDown(self):
+ from email import Charset as CharsetModule
+ try:
+ del CharsetModule.CHARSETS['fake']
+ except KeyError:
+ pass
+
def test_idempotent(self):
eq = self.assertEqual
# Make sure us-ascii = no Unicode conversion
sp = c.to_splittable(s)
eq(s, c.from_splittable(sp))
+ def test_body_encode(self):
+ eq = self.assertEqual
+ # Try a charset with QP body encoding
+ c = Charset('iso-8859-1')
+ eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
+ # Try a charset with Base64 body encoding
+ c = Charset('utf-8')
+ eq('aGVsbG8gd29ybGQ=\n', c.body_encode('hello world'))
+ # Try a charset with None body encoding
+ c = Charset('us-ascii')
+ eq('hello world', c.body_encode('hello world'))
+ # Try the convert argument, where input codec <> output codec
+ c = Charset('euc-jp')
+ # With apologies to Tokio Kikuchi ;)
+ try:
+ eq('\x1b$B5FCO;~IW\x1b(B',
+ c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
+ eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
+ c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
+ except LookupError:
+ # We probably don't have the Japanese codecs installed
+ pass
+ # Testing SF bug #625509, which we have to fake, since there are no
+ # built-in encodings where the header encoding is QP but the body
+ # encoding is not.
+ from email import Charset as CharsetModule
+ CharsetModule.add_charset('fake', CharsetModule.QP, None)
+ c = Charset('fake')
+ eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
+
\f
# Test multilingual MIME headers.
h = Header('Hello World!')
eq(h.encode(), 'Hello World!')
h.append(' Goodbye World!')
- eq(h.encode(), 'Hello World! Goodbye World!')
+ eq(h.encode(), 'Hello World! Goodbye World!')
def test_simple_surprise(self):
eq = self.ndiffAssertEqual
h = Header('Hello World!')
eq(h.encode(), 'Hello World!')
h.append('Goodbye World!')
- eq(h.encode(), 'Hello World!Goodbye World!')
+ eq(h.encode(), 'Hello World! Goodbye World!')
def test_header_needs_no_decoding(self):
h = 'no decoding needed'
def test_long(self):
h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
maxlinelen=76)
- for l in h.encode().split('\n '):
+ for l in h.encode(splitchars=' ').split('\n '):
self.failUnless(len(l) <= 76)
def test_multilingual(self):
h.append(cz_head, cz)
h.append(utf8_head, utf8)
enc = h.encode()
- eq(enc, """=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
- =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
- =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
- =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
- =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
- =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
- =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
- =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
- =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
- =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
- =?utf-8?q?s_Nunstuck_git_und?=
- =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?=
- =?utf-8?q?r_die_Flipperwaldt?=
- =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
+ eq(enc, """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_ko?=
+ =?iso-8859-1?q?mfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wan?=
+ =?iso-8859-1?q?dgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6?=
+ =?iso-8859-1?q?rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
+ =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
+ =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
+ =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
+ =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
+ =?utf-8?q?_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das_Oder_die_Fl?=
+ =?utf-8?b?aXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBo+OBpuOBhOOBvuOBmQ==?=
+ =?utf-8?b?44CC?=""")
eq(decode_header(enc),
[(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
(utf8_head, "utf-8")])
h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
+ def test_bad_8bit_header(self):
+ raises = self.assertRaises
+ eq = self.assertEqual
+ x = 'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ raises(UnicodeError, Header, x)
+ h = Header()
+ raises(UnicodeError, h.append, x)
+ eq(str(Header(x, errors='replace')), x)
+ h.append(x, errors='replace')
+ eq(str(h), x)
+
+ def test_encoded_adjacent_nonencoded(self):
+ eq = self.assertEqual
+ h = Header()
+ h.append('hello', 'iso-8859-1')
+ h.append('world')
+ s = h.encode()
+ eq(s, '=?iso-8859-1?q?hello?= world')
+ h = make_header(decode_header(s))
+ eq(h.encode(), s)
+
+ def test_whitespace_eater(self):
+ eq = self.assertEqual
+ s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
+ parts = decode_header(s)
+ eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)])
+ hdr = make_header(parts)
+ eq(hdr.encode(),
+ 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
+
+ def test_broken_base64_header(self):
+ raises = self.assertRaises
+ s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?='
+ raises(Errors.HeaderParseError, decode_header, s)
+
\f
# Test RFC 2231 header parameters (en/de)coding
msg = self._msgobj('msg_29.txt')
eq(msg.get_param('title'),
('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
- eq(msg.get_param('title', unquote=0),
+ eq(msg.get_param('title', unquote=False),
('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
def test_set_param(self):
msg = self._msgobj('msg_32.txt')
eq(msg.get_content_charset(), 'us-ascii')
+ def test_rfc2231_no_language_or_charset(self):
+ m = '''\
+Content-Transfer-Encoding: 8bit
+Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
+Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_param('NAME'),
+ (None, None, 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm'))
+
\f
def _testclasses():
ghello = 'Gr\xfc\xdf Gott!'
h.append(jhello, j)
h.append(ghello, g)
- eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=')
+ # BAW: This used to -- and maybe should -- fold the two iso-8859-1
+ # chunks into a single encoded word. However it doesn't violate the
+ # standard to have them as two encoded chunks and maybe it's
+ # reasonable <wink> for each .append() call to result in a separate
+ # encoded word.
+ eq(h.encode(), """\
+Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
+ =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
eq(decode_header(h.encode()),
[('Hello World!', None),
('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
h = Header(long, j, header_name="Subject")
# test a very long header
enc = h.encode()
- # BAW: The following used to pass. Sadly, the test afterwards is what
- # happens now. I've no idea which is right. Please, any Japanese and
- # RFC 2047 experts, please verify!
-## eq(enc, '''\
-##=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=
-## =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=
-## =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=''')
- eq(enc, """\
-=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=
- =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NUcnJHJCVCRDJEYkJCReJDkbKEI=?=""")
- # BAW: same deal here. :(
-## self.assertEqual(
-## decode_header(enc),
-## [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
- self.assertEqual(
- decode_header(enc),
- [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5G'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
+ # TK: splitting point may differ by codec design and/or Header encoding
+ eq(enc , """\
+=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
+ =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
+ # TK: full decode comparison
+ eq(h.__unicode__().encode('euc-jp'), long)
\f