return _find_series_by_markers(project, mail, author)
-def get_or_create_author(mail):
- from_header = clean_header(mail.get('From'))
-
- if not from_header:
- raise ValueError("Invalid 'From' header")
-
+def split_from_header(from_header):
name, email = (None, None)
# tuple of (regex, fn)
(name, email) = fn(match.groups())
break
+ return (name, email)
+
+
+# Unmangle From addresses that have been mangled for DMARC purposes.
+#
+# To avoid triggering spam filters due to failed signature validation, many
+# mailing lists mangle the From header to change the From address to be the
+# address of the list, typically where the sender's domain has a strict
+# DMARC policy enabled.
+#
+# Unfortunately, there's no standardised way of preserving the original
+# From address.
+#
+# Google Groups adds an X-Original-From header. If present, we use that.
+#
+# Mailman preserves the original address by adding a Reply-To, except in the
+# case where the list is set to either reply to list, or reply to a specific
+# address, in which case the original From is added to Cc instead. These corner
+# cases are dumb, but we try and handle things as sensibly as possible by
+# looking for a name in Reply-To/Cc that matches From. It's inexact but should
+# be good enough for our purposes.
+def get_original_sender(mail, name, email):
+ if name and ' via ' in name:
+ # Mailman uses the format "<name> via <list>"
+ # Google Groups uses "'<name>' via <list>"
+ stripped_name = name[:name.rfind(' via ')].strip().strip("'")
+
+ original_from = clean_header(mail.get('X-Original-From', ''))
+ if original_from:
+ new_email = split_from_header(original_from)[1].strip()[:255]
+ return (stripped_name, new_email)
+
+ addrs = []
+ reply_to_headers = mail.get_all('Reply-To') or []
+ cc_headers = mail.get_all('Cc') or []
+ for header in reply_to_headers + cc_headers:
+ header = clean_header(header)
+ addrs = header.split(",")
+ for addr in addrs:
+ new_name, new_email = split_from_header(addr)
+ if new_name:
+ new_name = new_name.strip()[:255]
+ if new_email:
+ new_email = new_email.strip()[:255]
+ if new_name == stripped_name:
+ return (stripped_name, new_email)
+
+ # If we can't figure out the original sender, just keep it as is
+ return (name, email)
+
+
+def get_or_create_author(mail, project=None):
+ from_header = clean_header(mail.get('From'))
+
+ if not from_header:
+ raise ValueError("Invalid 'From' header")
+
+ name, email = split_from_header(from_header)
+
if not email:
raise ValueError("Invalid 'From' header")
if name is not None:
name = name.strip()[:255]
+ if project and email.lower() == project.listemail.lower():
+ name, email = get_original_sender(mail, name, email)
+
# this correctly handles the case where we lose the race to create
# the person and another process beats us to it. (If the record
# does not exist, g_o_c invokes _create_object_from_params which
if not is_comment and (diff or pull_url): # patches or pull requests
# we delay the saving until we know we have a patch.
- author = get_or_create_author(mail)
+ author = get_or_create_author(mail, project)
delegate = find_delegate_by_header(mail)
if not delegate and diff:
is_cover_letter = True
if is_cover_letter:
- author = get_or_create_author(mail)
+ author = get_or_create_author(mail, project)
# we don't use 'find_series' here as a cover letter will
# always be the first item in a thread, thus the references
if not submission:
return
- author = get_or_create_author(mail)
+ author = get_or_create_author(mail, project)
try:
comment = Comment.objects.create(
patch.url_msgid]))
self.assertContains(response, from_email)
+ def test_dmarc_from_header(self):
+ """Validate 'From' header is rewritten correctly when DMARC-munged.
+
+ Test that when an email with a DMARC-munged From header is processed,
+ the From header will be unmunged and the munged address will be saved
+ as 'X-Patchwork-Original-From'.
+ """
+ orig_from_header = 'Person via List <list@example.com>'
+ rewritten_from_header = 'Person <person@example.com>'
+ project = create_project(listemail='list@example.com')
+ person = create_person(name='Person', email='person@example.com')
+ patch = create_patch(project=project,
+ headers='From: ' + orig_from_header,
+ submitter=person)
+ response = self.client.get(
+ reverse('patch-mbox', args=[patch.project.linkname,
+ patch.url_msgid]))
+ mail = email.message_from_string(response.content.decode())
+ self.assertEqual(mail['From'], rewritten_from_header)
+ self.assertEqual(mail['X-Patchwork-Original-From'], orig_from_header)
+
def test_date_header(self):
patch = create_patch()
response = self.client.get(
"""
@staticmethod
- def _create_email(from_header):
+ def _create_email(from_header, reply_tos=None, ccs=None,
+ x_original_from=None):
mail = 'Message-Id: %s\n' % make_msgid() + \
- 'From: %s\n' % from_header + \
- 'Subject: Tests\n\n'\
- 'test\n'
+ 'From: %s\n' % from_header
+
+ if reply_tos:
+ mail += 'Reply-To: %s\n' % ', '.join(reply_tos)
+
+ if ccs:
+ mail += 'Cc: %s\n' % ', '.join(ccs)
+
+ if x_original_from:
+ mail += 'X-Original-From: %s\n' % x_original_from
+
+ mail += 'Subject: Tests\n\n'\
+ 'test\n'
+
return message_from_string(mail)
def test_existing_sender(self):
self.assertEqual(person_b._state.adding, False)
self.assertEqual(person_b.id, person_a.id)
+ def test_mailman_dmarc_munging(self):
+ project = create_project()
+ real_sender = 'Existing Sender <existing@example.com>'
+ munged_sender = 'Existing Sender via List <{}>'.format(
+ project.listemail)
+ other_email = 'Other Person <other@example.com>'
+
+ # Unmunged author
+ mail = self._create_email(real_sender)
+ person_a = get_or_create_author(mail, project)
+ person_a.save()
+
+ # Single Reply-To
+ mail = self._create_email(munged_sender, [real_sender])
+ person_b = get_or_create_author(mail, project)
+ self.assertEqual(person_b._state.adding, False)
+ self.assertEqual(person_b.id, person_a.id)
+
+ # Single Cc
+ mail = self._create_email(munged_sender, [], [real_sender])
+ person_b = get_or_create_author(mail, project)
+ self.assertEqual(person_b._state.adding, False)
+ self.assertEqual(person_b.id, person_a.id)
+
+ # Multiple Reply-Tos and Ccs
+ mail = self._create_email(munged_sender, [other_email, real_sender],
+ [other_email, other_email])
+ person_b = get_or_create_author(mail, project)
+ self.assertEqual(person_b._state.adding, False)
+ self.assertEqual(person_b.id, person_a.id)
+
+ def test_google_dmarc_munging(self):
+ project = create_project()
+ real_sender = 'Existing Sender <existing@example.com>'
+ munged_sender = "'Existing Sender' via List <{}>".format(
+ project.listemail)
+
+ # Unmunged author
+ mail = self._create_email(real_sender)
+ person_a = get_or_create_author(mail, project)
+ person_a.save()
+
+ # X-Original-From header
+ mail = self._create_email(munged_sender, None, None, real_sender)
+ person_b = get_or_create_author(mail, project)
+ self.assertEqual(person_b._state.adding, False)
+ self.assertEqual(person_b.id, person_a.id)
+
class SeriesCorrelationTest(TestCase):
"""Validate correct behavior of find_series."""
from patchwork.models import Comment
from patchwork.models import Patch
+from patchwork.parser import split_from_header
if settings.ENABLE_REST_API:
from rest_framework.authtoken.models import Token
# [1] https://tools.ietf.org/html/rfc1847
if key == 'Content-Type' and val == 'multipart/signed':
continue
+
+ if key == 'From':
+ name, addr = split_from_header(val)
+ if addr == submission.project.listemail:
+ # If From: is the list address (typically DMARC munging), then
+ # use the submitter details (which are cleaned up in the
+ # parser) in the From: field so that the patch author details
+ # are correct when applied with git am.
+ mail['X-Patchwork-Original-From'] = val
+ val = mail['X-Patchwork-Submitter']
+
mail[key] = val
if 'Date' not in mail:
--- /dev/null
+---
+fixes:
+ - |
+ To avoid triggering spam filters due to failed signature validation, many
+ mailing lists mangle the From header to change the From address to be the
+ address of the list, typically where the sender's domain has a strict DMARC
+ policy enabled. This leads to incorrect senders being recorded. We now try
+ to unmangle the From header using the X-Original-From or Reply-To headers,
+ as used by Google Groups and Mailman respectively.