From: Stephen Finucane Date: Tue, 12 Jul 2016 13:59:39 +0000 (+0100) Subject: parser: Move hash_diff and extract_tags X-Git-Tag: v2.0.0-rc1~267 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=69112419b02d4a57530ec65f8ee6145d4fb11988;p=thirdparty%2Fpatchwork.git parser: Move hash_diff and extract_tags These functions are only used in models.Patch. Add them as static methods. Signed-off-by: Stephen Finucane Reviewed-by: Andy Doan --- diff --git a/patchwork/models.py b/patchwork/models.py index ee2ee63d..5043c9a0 100644 --- a/patchwork/models.py +++ b/patchwork/models.py @@ -22,6 +22,7 @@ from __future__ import absolute_import from collections import Counter, OrderedDict import datetime +import hashlib import random import re @@ -35,7 +36,6 @@ from django.utils.functional import cached_property from django.utils.six.moves import filter from patchwork.fields import HashField -from patchwork.parser import extract_tags, hash_diff @python_2_unicode_compatible @@ -337,6 +337,64 @@ class Patch(Submission): objects = PatchManager() + @staticmethod + def extract_tags(content, tags): + counts = Counter() + + for tag in tags: + regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE) + counts[tag] = len(regex.findall(content)) + + return counts + + @staticmethod + def hash_diff(diff): + """Generate a hash from a diff.""" + hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') + filename_re = re.compile(r'^(---|\+\+\+) (\S+)') + + # normalise spaces + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' + + prefixes = ['-', '+', ' '] + hash = hashlib.sha1() + + for line in diff.split('\n'): + if len(line) <= 0: + continue + + hunk_match = hunk_re.match(line) + filename_match = filename_re.match(line) + + if filename_match: + # normalise -p1 top-directories + if filename_match.group(1) == '---': + filename = 'a/' + else: + filename = 'b/' + filename += '/'.join(filename_match.group(2).split('/')[1:]) + + line = filename_match.group(1) + ' ' + filename + elif hunk_match: + # remove line numbers, but leave line counts + def fn(x): + if not x: + return 1 + return int(x) + line_nos = list(map(fn, hunk_match.groups())) + line = '@@ -%d +%d @@' % tuple(line_nos) + elif line[0] in prefixes: + # if we have a +, - or context line, leave as-is + pass + else: + # other lines are ignored + continue + + hash.update((line + '\n').encode('utf-8')) + + return hash + def _set_tag(self, tag, count): if count == 0: self.patchtag_set.filter(tag=tag).delete() @@ -351,10 +409,10 @@ class Patch(Submission): counter = Counter() if self.content: - counter += extract_tags(self.content, tags) + counter += self.extract_tags(self.content, tags) for comment in self.comments.all(): - counter = counter + extract_tags(comment.content, tags) + counter = counter + self.extract_tags(comment.content, tags) for tag in tags: self._set_tag(tag, counter[tag]) @@ -364,7 +422,7 @@ class Patch(Submission): self.state = get_default_initial_patch_state() if self.hash is None and self.diff is not None: - self.hash = hash_diff(self.diff).hexdigest() + self.hash = self.hash_diff(self.diff).hexdigest() super(Patch, self).save(**kwargs) diff --git a/patchwork/parser.py b/patchwork/parser.py index 61a6fd98..c9c058d7 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -19,8 +19,6 @@ # along with Patchwork; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -from collections import Counter -import hashlib import re from django.utils.six.moves import map @@ -183,61 +181,6 @@ def parse_patch(content): return patchbuf, commentbuf -def hash_diff(diff): - """Generate a hash from a diff.""" - # normalise spaces - diff = diff.replace('\r', '') - diff = diff.strip() + '\n' - - prefixes = ['-', '+', ' '] - hash = hashlib.sha1() - - for line in diff.split('\n'): - if len(line) <= 0: - continue - - hunk_match = _hunk_re.match(line) - filename_match = _filename_re.match(line) - - if filename_match: - # normalise -p1 top-directories - if filename_match.group(1) == '---': - filename = 'a/' - else: - filename = 'b/' - filename += '/'.join(filename_match.group(2).split('/')[1:]) - - line = filename_match.group(1) + ' ' + filename - elif hunk_match: - # remove line numbers, but leave line counts - def fn(x): - if not x: - return 1 - return int(x) - line_nos = list(map(fn, hunk_match.groups())) - line = '@@ -%d +%d @@' % tuple(line_nos) - elif line[0] in prefixes: - # if we have a +, - or context line, leave as-is - pass - else: - # other lines are ignored - continue - - hash.update((line + '\n').encode('utf-8')) - - return hash - - -def extract_tags(content, tags): - counts = Counter() - - for tag in tags: - regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE) - counts[tag] = len(regex.findall(content)) - - return counts - - def find_filenames(diff): """Find files changes in a given diff.""" # normalise spaces diff --git a/patchwork/tests/test_tags.py b/patchwork/tests/test_tags.py index e7d7fadf..7d200e3e 100644 --- a/patchwork/tests/test_tags.py +++ b/patchwork/tests/test_tags.py @@ -23,7 +23,6 @@ from django.test import TransactionTestCase from patchwork.models import Patch from patchwork.models import PatchTag from patchwork.models import Tag -from patchwork.parser import extract_tags from patchwork.tests.utils import create_comment from patchwork.tests.utils import create_patch @@ -35,7 +34,7 @@ class ExtractTagsTest(TestCase): name_email = 'test name <' + email + '>' def assertTagsEqual(self, str, acks, reviews, tests): - counts = extract_tags(str, Tag.objects.all()) + counts = Patch.extract_tags(str, Tag.objects.all()) self.assertEqual((acks, reviews, tests), (counts[Tag.objects.get(name='Acked-by')], counts[Tag.objects.get(name='Reviewed-by')],