Email generators using email.policy.default may convert an RFC 2047
encoded-word to unencoded form during header refolding. In a structured
header, this could allow 'specials' chars outside a quoted-string,
leading to invalid address headers and enabling spoofing. This change
ensures a parsed encoded-word that contains specials is kept as an
encoded-word while the header is refolded.
[Better fix from @bitdancer.]
(cherry picked from commit
295b53df2aa18deb625a7da41f7e4babfe6ef34b)
Co-authored-by: Mike Edmunds <medmunds@gmail.com>
Co-authored-by: R David Murray <rdmurray@bitdance.com>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
return fws, newvalue
-def get_encoded_word(value):
+def get_encoded_word(value, terminal_type='vtext'):
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
"""
ew.append(token)
continue
chars, *remainder = _wsp_splitter(text, 1)
- vtext = ValueTerminal(chars, 'vtext')
+ vtext = ValueTerminal(chars, terminal_type)
_validate_xtext(vtext)
ew.append(vtext)
text = ''.join(remainder)
valid_ew = True
if value.startswith('=?'):
try:
- token, value = get_encoded_word(value)
+ token, value = get_encoded_word(value, 'utext')
except _InvalidEwError:
valid_ew = False
except errors.HeaderParseError:
# the parser to go in an infinite loop.
if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?')
- vtext = ValueTerminal(tok, 'vtext')
+ vtext = ValueTerminal(tok, 'utext')
_validate_xtext(vtext)
unstructured.append(vtext)
value = ''.join(remainder)
continue
tstr = str(part)
if not want_encoding:
- if part.token_type == 'ptext':
+ if part.token_type in ('ptext', 'vtext'):
# Encode if tstr contains special characters.
want_encoding = not SPECIALSNL.isdisjoint(tstr)
else:
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
+ def test_address_list_with_specials_in_encoded_word(self):
+ # An encoded-word parsed from a structured header must remain
+ # encoded when it contains specials. Regression for gh-121284.
+ policy = self.policy.clone(max_line_length=40)
+ cases = [
+ # (to, folded)
+ ('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
+ 'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
+ ' =?utf-8?q?=2C?= comma <to@example.com>\n'),
+ ('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
+ 'This long name does not need\n'
+ ' encoded-word <to@example.com>\n'),
+ ('"A véry long name with, comma" <to@example.com>',
+ # (This isn't the best fold point, but it's not invalid.)
+ 'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
+ ' =?utf-8?q?=2C?= comma <to@example.com>\n'),
+ ('"A véry long name containing a, comma" <to@example.com>',
+ 'A =?utf-8?q?v=C3=A9ry?= long name\n'
+ ' containing =?utf-8?q?a=2C?= comma\n'
+ ' <to@example.com>\n'),
+ ]
+ for (to, folded) in cases:
+ with self.subTest(to=to):
+ self._test(parser.get_address_list(to)[0], folded, policy=policy)
+
def test_address_list_with_list_separator_after_fold(self):
a = 'x' * 66 + '@example.com'
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
--- /dev/null
+Fix bug in the folding of rfc2047 encoded-words when flattening an email message
+using a modern email policy. Previously when an encoded-word was too long
+for a line, it would be decoded, split across lines, and re-encoded. But commas
+and other special characters in the original text could be left unencoded and
+unquoted. This could theoretically be used to spoof header lines using
+a carefully constructed encoded-word if the resulting rendered email was
+transmitted or re-parsed.