From: Stephen Finucane Date: Sun, 10 Apr 2016 12:45:13 +0000 (+0100) Subject: trivial: Cleanup of 'parser' X-Git-Tag: v2.0.0-rc1~269 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d1d176e75333ec9ff255eada8c319305ca287365;p=thirdparty%2Fpatchwork.git trivial: Cleanup of 'parser' Do some cleanup of the file by removing excess whitespace, adding some documentation, removing shadowing of keywords and renaming some functions to more accurately reflect their purpose. Signed-off-by: Stephen Finucane Reviewed-by: Andy Doan --- diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py index 48f809fb..89130764 100755 --- a/patchwork/bin/parsemail.py +++ b/patchwork/bin/parsemail.py @@ -44,7 +44,7 @@ from django.utils.six.moves import map from patchwork.models import (Patch, Project, Person, Comment, State, DelegationRule, Submission, CoverLetter, get_default_initial_patch_state) -from patchwork.parser import parse_patch, patch_get_filenames +from patchwork.parser import parse_patch, find_filenames LOGGER = logging.getLogger(__name__) @@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None): delegate = find_delegate(mail) if not delegate and diff: - filenames = patch_get_filenames(diff) + filenames = find_filenames(diff) delegate = auto_delegate(project, filenames) patch = Patch( diff --git a/patchwork/models.py b/patchwork/models.py index 521b20c2..ee2ee63d 100644 --- a/patchwork/models.py +++ b/patchwork/models.py @@ -35,7 +35,7 @@ from django.utils.functional import cached_property from django.utils.six.moves import filter from patchwork.fields import HashField -from patchwork.parser import extract_tags, hash_patch +from patchwork.parser import extract_tags, hash_diff @python_2_unicode_compatible @@ -364,7 +364,7 @@ class Patch(Submission): self.state = get_default_initial_patch_state() if self.hash is None and self.diff is not None: - self.hash = hash_patch(self.diff).hexdigest() + self.hash = hash_diff(self.diff).hexdigest() super(Patch, self).save(**kwargs) diff --git a/patchwork/parser.py b/patchwork/parser.py index 8bf9b213..f173431c 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') _filename_re = re.compile('^(---|\+\+\+) (\S+)') -def parse_patch(text): +def parse_patch(content): + """Split a mail's contents into a diff and comment. + + This is a state machine that takes a patch, generally in UNIX mbox + format, and splits it into the component comments and diff. + + Args: + patch: The patch to be split + + Returns: + A tuple containing the diff and comment. Either one or both of + these can be empty. + + Raises: + Exception: The state machine transitioned to an invalid state. + """ patchbuf = '' commentbuf = '' buf = '' @@ -68,7 +83,7 @@ def parse_patch(text): lc = (0, 0) hunk = 0 - for line in text.split('\n'): + for line in content.split('\n'): line += '\n' if state == 0: @@ -76,14 +91,11 @@ def parse_patch(text): or line.startswith('Index: '): state = 1 buf += line - elif line.startswith('--- '): state = 2 buf += line - else: commentbuf += line - elif state == 1: buf += line if line.startswith('--- '): @@ -91,25 +103,20 @@ def parse_patch(text): if line.startswith(('rename from ', 'rename to ')): state = 6 - elif state == 2: if line.startswith('+++ '): state = 3 buf += line - elif hunk: state = 1 buf += line - else: state = 0 commentbuf += buf + line buf = '' - elif state == 3: match = _hunk_re.match(line) if match: - def fn(x): if not x: return 1 @@ -120,26 +127,21 @@ def parse_patch(text): state = 4 patchbuf += buf + line buf = '' - elif line.startswith('--- '): patchbuf += buf + line buf = '' state = 2 - elif hunk and line.startswith('\ No newline at end of file'): # If we had a hunk and now we see this, it's part of the patch, # and we're still expecting another @@ line. patchbuf += line - elif hunk: state = 1 buf += line - else: state = 0 commentbuf += buf + line buf = '' - elif state == 4 or state == 5: if line.startswith('-'): lc[0] -= 1 @@ -159,21 +161,17 @@ def parse_patch(text): hunk += 1 else: state = 5 - elif state == 6: if line.startswith(('rename to ', 'rename from ')): patchbuf += buf + line buf = '' - elif line.startswith('--- '): patchbuf += buf + line buf = '' state = 2 - else: buf += line state = 1 - else: raise Exception("Unknown state %d! (line '%s')" % (state, line)) @@ -185,19 +183,19 @@ def parse_patch(text): if commentbuf == '': commentbuf = None - return (patchbuf, commentbuf) + return patchbuf, commentbuf -def hash_patch(str): +def hash_diff(diff): + """Generate a hash from a diff.""" # normalise spaces - str = str.replace('\r', '') - str = str.strip() + '\n' + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' prefixes = ['-', '+', ' '] hash = hashlib.sha1() - for line in str.split('\n'): - + for line in diff.split('\n'): if len(line) <= 0: continue @@ -213,7 +211,6 @@ def hash_patch(str): filename += '/'.join(filename_match.group(2).split('/')[1:]) line = filename_match.group(1) + ' ' + filename - elif hunk_match: # remove line numbers, but leave line counts def fn(x): @@ -222,11 +219,9 @@ def hash_patch(str): return int(x) line_nos = list(map(fn, hunk_match.groups())) line = '@@ -%d +%d @@' % tuple(line_nos) - elif line[0] in prefixes: # if we have a +, - or context line, leave as-is pass - else: # other lines are ignored continue @@ -246,15 +241,15 @@ def extract_tags(content, tags): return counts -def patch_get_filenames(str): +def find_filenames(diff): + """Find files changes in a given diff.""" # normalise spaces - str = str.replace('\r', '') - str = str.strip() + '\n' + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' filenames = {} - for line in str.split('\n'): - + for line in diff.split('\n'): if len(line) <= 0: continue @@ -291,21 +286,20 @@ def main(args): # decode from (assumed) UTF-8 content = sys.stdin.read().decode('utf-8') - - (patch, comment) = parse_patch(content) + patch, comment = parse_patch(content) if options.print_hash and patch: - print(hash_patch(patch).hexdigest()) + print(hash_diff(patch).hexdigest()) if options.print_patch and patch: - print("Patch: ------\n" + patch) + print('Patch: ------\n' + patch) if options.print_comment and comment: - print("Comment: ----\n" + comment) + print('Comment: ----\n' + comment) if options.print_filenames: - filenames = patch_get_filenames(content) - print("File names: ----\n" + '\n'.join(filenames)) + filenames = find_filenames(content) + print('File names: ----\n' + '\n'.join(filenames)) if __name__ == '__main__': import sys