# tuple of (regex, fn)
# - where fn returns a (name, email) tuple from the match groups resulting
# from re.match().groups()
- # TODO(stephenfin): Perhaps we should check for "real" email addresses
- # instead of anything ('.*?')
from_res = [
# for "Firstname Lastname" <example@example.com> style addresses
(re.compile(r'"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))),
(name, email) = fn(match.groups())
break
+ # Checking for real email address.
+ email_pattern = (
+ r'^[\w!#$%&"*+/=?^`{|}~-]+(?:\.[\w!#$%&"*+/=?^`{|}~-]+)*'
+ r'@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+'
+ r'[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?$'
+ )
+ if not re.match(email_pattern, email):
+ email = None
return (name, email)
with self.assertRaises(ValueError):
get_or_create_author(email)
+ def test_invalid_email(self):
+ # Broken email clients garble email addresses -- #512
+ from_header = 'testH=?UTF-8?B?w6E=?=user=?UTF-8?B?w6E=?='
+ email = self._create_email(from_header)
+ with self.assertRaises(ValueError):
+ get_or_create_author(email)
+
def test_ascii_encoding(self):
from_header = 'example user <user@example.com>'
sender_name = 'example user'