trivial: Cleanup of 'parser'

author Stephen Finucane <stephen.finucane@intel.com>

Sun, 10 Apr 2016 12:45:13 +0000 (13:45 +0100)

committer Stephen Finucane <stephenfinucane@hotmail.com>

Fri, 2 Sep 2016 19:04:13 +0000 (20:04 +0100)
author Stephen Finucane <stephen.finucane@intel.com>
Sun, 10 Apr 2016 12:45:13 +0000 (13:45 +0100)
committer Stephen Finucane <stephenfinucane@hotmail.com>
Fri, 2 Sep 2016 19:04:13 +0000 (20:04 +0100)
diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py

index 48f809fb8d337847cb03933635c74bfd977ce5eb..89130764eb72f40e1d5f049ffccb46e3bb95439a 100755 (executable)
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -44,7 +44,7 @@ from django.utils.six.moves import map
  from patchwork.models import (Patch, Project, Person, Comment, State,
                                DelegationRule, Submission, CoverLetter,
                                get_default_initial_patch_state)
-from patchwork.parser import parse_patch, patch_get_filenames
+from patchwork.parser import parse_patch, find_filenames
  
  LOGGER = logging.getLogger(__name__)
  
@@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None):
  
          delegate = find_delegate(mail)
          if not delegate and diff:
-            filenames = patch_get_filenames(diff)
+            filenames = find_filenames(diff)
              delegate = auto_delegate(project, filenames)
  
          patch = Patch(
diff --git a/patchwork/models.py b/patchwork/models.py

index 521b20c222b31e0176b945928f6c0d5a32bff005..ee2ee63d2daceeb5d4c3585bc9fe3db2f3318cc7 100644 (file)
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -35,7 +35,7 @@ from django.utils.functional import cached_property
  from django.utils.six.moves import filter
  
  from patchwork.fields import HashField
-from patchwork.parser import extract_tags, hash_patch
+from patchwork.parser import extract_tags, hash_diff
  
  
  @python_2_unicode_compatible
@@ -364,7 +364,7 @@ class Patch(Submission):
              self.state = get_default_initial_patch_state()
  
          if self.hash is None and self.diff is not None:
-            self.hash = hash_patch(self.diff).hexdigest()
+            self.hash = hash_diff(self.diff).hexdigest()
  
          super(Patch, self).save(**kwargs)
  
diff --git a/patchwork/parser.py b/patchwork/parser.py

index 8bf9b21371786e04c29f572ff59fab1c333465ed..f173431c46df81d0c2f09d95b06465ebe4cd155b 100644 (file)
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
  _filename_re = re.compile('^(---|\+\+\+) (\S+)')
  
  
-def parse_patch(text):
+def parse_patch(content):
+    """Split a mail's contents into a diff and comment.
+
+    This is a state machine that takes a patch, generally in UNIX mbox
+    format, and splits it into the component comments and diff.
+
+    Args:
+        patch: The patch to be split
+
+    Returns:
+        A tuple containing the diff and comment. Either one or both of
+        these can be empty.
+
+    Raises:
+        Exception: The state machine transitioned to an invalid state.
+    """
      patchbuf = ''
      commentbuf = ''
      buf = ''
@@ -68,7 +83,7 @@ def parse_patch(text):
      lc = (0, 0)
      hunk = 0
  
-    for line in text.split('\n'):
+    for line in content.split('\n'):
          line += '\n'
  
          if state == 0:
@@ -76,14 +91,11 @@ def parse_patch(text):
                      or line.startswith('Index: '):
                  state = 1
                  buf += line
-
              elif line.startswith('--- '):
                  state = 2
                  buf += line
-
              else:
                  commentbuf += line
-
          elif state == 1:
              buf += line
              if line.startswith('--- '):
@@ -91,25 +103,20 @@ def parse_patch(text):
  
              if line.startswith(('rename from ', 'rename to ')):
                  state = 6
-
          elif state == 2:
              if line.startswith('+++ '):
                  state = 3
                  buf += line
-
              elif hunk:
                  state = 1
                  buf += line
-
              else:
                  state = 0
                  commentbuf += buf + line
                  buf = ''
-
          elif state == 3:
              match = _hunk_re.match(line)
              if match:
-
                  def fn(x):
                      if not x:
                          return 1
@@ -120,26 +127,21 @@ def parse_patch(text):
                  state = 4
                  patchbuf += buf + line
                  buf = ''
-
              elif line.startswith('--- '):
                  patchbuf += buf + line
                  buf = ''
                  state = 2
-
              elif hunk and line.startswith('\ No newline at end of file'):
                  # If we had a hunk and now we see this, it's part of the patch,
                  # and we're still expecting another @@ line.
                  patchbuf += line
-
              elif hunk:
                  state = 1
                  buf += line
-
              else:
                  state = 0
                  commentbuf += buf + line
                  buf = ''
-
          elif state == 4 or state == 5:
              if line.startswith('-'):
                  lc[0] -= 1
@@ -159,21 +161,17 @@ def parse_patch(text):
                  hunk += 1
              else:
                  state = 5
-
          elif state == 6:
              if line.startswith(('rename to ', 'rename from ')):
                  patchbuf += buf + line
                  buf = ''
-
              elif line.startswith('--- '):
                  patchbuf += buf + line
                  buf = ''
                  state = 2
-
              else:
                  buf += line
                  state = 1
-
          else:
              raise Exception("Unknown state %d! (line '%s')" % (state, line))
  
@@ -185,19 +183,19 @@ def parse_patch(text):
      if commentbuf == '':
          commentbuf = None
  
-    return (patchbuf, commentbuf)
+    return patchbuf, commentbuf
  
  
-def hash_patch(str):
+def hash_diff(diff):
+    """Generate a hash from a diff."""
      # normalise spaces
-    str = str.replace('\r', '')
-    str = str.strip() + '\n'
+    diff = diff.replace('\r', '')
+    diff = diff.strip() + '\n'
  
      prefixes = ['-', '+', ' ']
      hash = hashlib.sha1()
  
-    for line in str.split('\n'):
-
+    for line in diff.split('\n'):
          if len(line) <= 0:
              continue
  
@@ -213,7 +211,6 @@ def hash_patch(str):
              filename += '/'.join(filename_match.group(2).split('/')[1:])
  
              line = filename_match.group(1) + ' ' + filename
-
          elif hunk_match:
              # remove line numbers, but leave line counts
              def fn(x):
@@ -222,11 +219,9 @@ def hash_patch(str):
                  return int(x)
              line_nos = list(map(fn, hunk_match.groups()))
              line = '@@ -%d +%d @@' % tuple(line_nos)
-
          elif line[0] in prefixes:
              # if we have a +, - or context line, leave as-is
              pass
-
          else:
              # other lines are ignored
              continue
@@ -246,15 +241,15 @@ def extract_tags(content, tags):
      return counts
  
  
-def patch_get_filenames(str):
+def find_filenames(diff):
+    """Find files changes in a given diff."""
      # normalise spaces
-    str = str.replace('\r', '')
-    str = str.strip() + '\n'
+    diff = diff.replace('\r', '')
+    diff = diff.strip() + '\n'
  
      filenames = {}
  
-    for line in str.split('\n'):
-
+    for line in diff.split('\n'):
          if len(line) <= 0:
              continue
  
@@ -291,21 +286,20 @@ def main(args):
  
      # decode from (assumed) UTF-8
      content = sys.stdin.read().decode('utf-8')
-
-    (patch, comment) = parse_patch(content)
+    patch, comment = parse_patch(content)
  
      if options.print_hash and patch:
-        print(hash_patch(patch).hexdigest())
+        print(hash_diff(patch).hexdigest())
  
      if options.print_patch and patch:
-        print("Patch: ------\n" + patch)
+        print('Patch: ------\n' + patch)
  
      if options.print_comment and comment:
-        print("Comment: ----\n" + comment)
+        print('Comment: ----\n' + comment)
  
      if options.print_filenames:
-        filenames = patch_get_filenames(content)
-        print("File names: ----\n" + '\n'.join(filenames))
+        filenames = find_filenames(content)
+        print('File names: ----\n' + '\n'.join(filenames))
  
  if __name__ == '__main__':
      import sys
author	Stephen Finucane <stephen.finucane@intel.com>
	Sun, 10 Apr 2016 12:45:13 +0000 (13:45 +0100)
committer	Stephen Finucane <stephenfinucane@hotmail.com>
	Fri, 2 Sep 2016 19:04:13 +0000 (20:04 +0100)
patchwork/bin/parsemail.py		patch \| blob \| blame \| history
patchwork/models.py		patch \| blob \| blame \| history
patchwork/parser.py		patch \| blob \| blame \| history