I am re-submitting an older PR which was abandoned but is still relevant, #10783 by @timb07.
The issue being solved () is still relevant. The original PR #10783 was closed as
the final request changes were not applied and since abandoned.
In this new PR I have re-used the original patch plus applied both comments from the review, by @maxking and @pganssle.
For reference, here is the original PR description:
In email.utils.parsedate_to_datetime(), a failure to parse the date, or invalid date components (such as hour outside 0..23) raises an exception. Document this behaviour, and add tests to test_email/test_utils.py to confirm this behaviour.
In email.headerregistry.DateHeader.parse(), check when parsedate_to_datetime() raises an exception and add a new defect InvalidDateDefect; preserve the invalid value as the string value of the header, but set the datetime attribute to None.
Add tests to test_email/test_headerregistry.py to confirm this behaviour; also added test to test_email/test_inversion.py to confirm emails with such defective date headers round trip successfully.
This pull request incorporates feedback gratefully received from @bitdancer, @brettcannon, @Mariatta and @warsaw, and replaces the earlier PR #2254.
Automerge-Triggered-By: GH:warsaw
* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
bytes, the number of non-padding base64 characters was invalid (1 more than
a multiple of 4). The encoded block was kept as-is.
+
+* :class:`InvalidDateDefect` -- When decoding an invalid or unparsable date field.
+ The original value is kept as-is.
\ No newline at end of file
.. function:: parsedate_to_datetime(date)
The inverse of :func:`format_datetime`. Performs the same function as
- :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If
- the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
+ :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`;
+ otherwise ``ValueError`` is raised if *date* contains an invalid value such
+ as an hour greater than 23 or a timezone offset not between -24 and 24 hours.
+ If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
``datetime``, and if the date is conforming to the RFCs it will represent a
time in UTC but with no indication of the actual source timezone of the
message the date comes from. If the input date has any other valid timezone
"""
if not data:
- return
+ return None
data = data.split()
# The FWS after the comma after the day-of-week is optional, so search and
# adjust for this.
"""local_part contains non-ASCII characters"""
# This defect only occurs during unicode parsing, not when
# parsing messages decoded from binary.
+
+class InvalidDateDefect(HeaderDefect):
+ """Header has unparseable or invalid date"""
kwds['parse_tree'] = parser.TokenList()
return
if isinstance(value, str):
- value = utils.parsedate_to_datetime(value)
+ kwds['decoded'] = value
+ try:
+ value = utils.parsedate_to_datetime(value)
+ except ValueError:
+ kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
+ kwds['datetime'] = None
+ kwds['parse_tree'] = parser.TokenList()
+ return
kwds['datetime'] = value
kwds['decoded'] = utils.format_datetime(kwds['datetime'])
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
def parsedate_to_datetime(data):
- *dtuple, tz = _parsedate_tz(data)
+ parsed_date_tz = _parsedate_tz(data)
+ if parsed_date_tz is None:
+ raise ValueError('Invalid date value or format "%s"' % str(data))
+ *dtuple, tz = parsed_date_tz
if tz is None:
return datetime.datetime(*dtuple[:6])
return datetime.datetime(*dtuple[:6],
self.assertEqual(len(h.defects), 1)
self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)
+ def test_invalid_date_format(self):
+ s = 'Not a date header'
+ h = self.make_header('date', s)
+ self.assertEqual(h, s)
+ self.assertIsNone(h.datetime)
+ self.assertEqual(len(h.defects), 1)
+ self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
+
+ def test_invalid_date_value(self):
+ s = 'Tue, 06 Jun 2017 27:39:33 +0600'
+ h = self.make_header('date', s)
+ self.assertEqual(h, s)
+ self.assertIsNone(h.datetime)
+ self.assertEqual(len(h.defects), 1)
+ self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
+
def test_datetime_read_only(self):
h = self.make_header('date', self.datestring)
with self.assertRaises(AttributeError):
foo
"""),),
+ 'header_with_invalid_date': (dedent(b"""\
+ Date: Tue, 06 Jun 2017 27:39:33 +0600
+ From: abc@xyz.com
+ Subject: timezones
+
+ How do they work even?
+ """),),
+
}
payload_params = {
utils.parsedate_to_datetime(self.datestring + ' -0000'),
self.naive_dt)
+ def test_parsedate_to_datetime_with_invalid_raises_valueerror(self):
+ invalid_dates = ['',
+ '0',
+ 'A Complete Waste of Time'
+ 'Tue, 06 Jun 2017 27:39:33 +0600',
+ 'Tue, 06 Jun 2017 07:39:33 +2600',
+ 'Tue, 06 Jun 2017 27:39:33']
+ for dtstr in invalid_dates:
+ with self.subTest(dtstr=dtstr):
+ self.assertRaises(ValueError, utils.parsedate_to_datetime, dtstr)
class LocaltimeTests(unittest.TestCase):
--- /dev/null
+Handle exceptions caused by unparseable date headers when using email
+"default" policy. Patch by Tim Bell, Georges Toth