From: Stephen Finucane Date: Fri, 13 Nov 2015 03:32:51 +0000 (+0000) Subject: bin/parsemail: Support user-provided list ID X-Git-Tag: v1.1.0~101 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=545c062e794715c30e1e77245a70303ac28a9da3;p=thirdparty%2Fpatchwork.git bin/parsemail: Support user-provided list ID Sometimes mails won't contain the headers necessary to extract a mailing list's ID (for example: mails downloaded from Mailman archives). However, should the user already know the correct mailing list ID then this extraction is not necessary. Allow the user to provide a mailing list ID by the command line. Signed-off-by: Stephen Finucane --- diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py index e05f0365..0562a45a 100755 --- a/patchwork/bin/parsemail.py +++ b/patchwork/bin/parsemail.py @@ -19,6 +19,7 @@ # along with Patchwork; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +import argparse import codecs import datetime from email import message_from_file @@ -59,7 +60,17 @@ def clean_header(header): return normalise_space(u' '.join(fragments)) -def find_project(mail): +def find_project_by_id(list_id): + """Find a `project` object with given `list_id`.""" + project = None + try: + project = Project.objects.get(listid=list_id) + except Project.DoesNotExist: + pass + return project + + +def find_project_by_header(mail): project = None listid_res = [re.compile(r'.*<([^>]+)>.*', re.S), re.compile(r'^([\S]+)$', re.S)] @@ -77,15 +88,12 @@ def find_project(mail): listid = match.group(1) - try: - project = Project.objects.get(listid=listid) + project = find_project_by_id(listid) + if project: break - except Project.DoesNotExist: - pass return project - def find_author(mail): from_header = clean_header(mail.get('From')) @@ -365,8 +373,16 @@ def get_delegate(delegate_email): return None -def parse_mail(mail): - """Parse a mail and add to the database.""" +def parse_mail(mail, list_id=None): + """Parse a mail and add to the database. + + Args: + mail (`mbox.Mail`): Mail to parse and add. + list_id (str): Mailing list ID + + Returns: + None + """ # some basic sanity checks if 'From' not in mail: return 0 @@ -381,7 +397,10 @@ def parse_mail(mail): if hint == 'ignore': return 0 - project = find_project(mail) + if list_id: + project = find_project_by_id(list_id) + else: + project = find_project_by_header(mail) if project is None: print("no project found") @@ -451,16 +470,28 @@ def setup_error_handler(): def main(args): django.setup() logger = setup_error_handler() - mail = message_from_file(sys.stdin) + parser = argparse.ArgumentParser() + + parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), + default=sys.stdin, help='input mbox file (a filename ' + 'or stdin)') + + group = parser.add_argument_group('Mail parsing configuration') + group.add_argument('--list-id', help='mailing list ID. If not supplied ' + 'this will be extracted from the mail headers.') + + args = vars(parser.parse_args()) + + mail = message_from_file(args['infile']) try: - return parse_mail(mail) + return parse_mail(mail, args['list_id']) except: if logger: logger.exception('Error when parsing incoming email', extra={ 'mail': mail.as_string(), }) raise - + return parse_mail(mail, args['list_id']) if __name__ == '__main__': sys.exit(main(sys.argv)) diff --git a/patchwork/tests/test_patchparser.py b/patchwork/tests/test_patchparser.py index 2b8f9d2f..77668e5e 100644 --- a/patchwork/tests/test_patchparser.py +++ b/patchwork/tests/test_patchparser.py @@ -34,8 +34,9 @@ class PatchTest(TestCase): default_subject = defaults.subject project = defaults.project -from patchwork.bin.parsemail import find_content, find_author, find_project, \ - parse_mail, split_prefixes, clean_subject +from patchwork.bin.parsemail import (find_content, find_author, + find_project_by_header, parse_mail, + split_prefixes, clean_subject) class InlinePatchTest(PatchTest): patch_filename = '0001-add-line.patch' @@ -321,25 +322,25 @@ class ListIdHeaderTest(TestCase): def testNoListId(self): email = MIMEText('') - project = find_project(email) + project = find_project_by_header(email) self.assertEquals(project, None) def testBlankListId(self): email = MIMEText('') email['List-Id'] = '' - project = find_project(email) + project = find_project_by_header(email) self.assertEquals(project, None) def testWhitespaceListId(self): email = MIMEText('') email['List-Id'] = ' ' - project = find_project(email) + project = find_project_by_header(email) self.assertEquals(project, None) def testSubstringListId(self): email = MIMEText('') email['List-Id'] = 'example.com' - project = find_project(email) + project = find_project_by_header(email) self.assertEquals(project, None) def testShortListId(self): @@ -347,13 +348,13 @@ class ListIdHeaderTest(TestCase): is only the list ID itself (without enclosing angle-brackets). """ email = MIMEText('') email['List-Id'] = self.project.listid - project = find_project(email) + project = find_project_by_header(email) self.assertEquals(project, self.project) def testLongListId(self): email = MIMEText('') email['List-Id'] = 'Test text <%s>' % self.project.listid - project = find_project(email) + project = find_project_by_header(email) self.assertEquals(project, self.project) def tearDown(self):