scripts/contrib/patchreview.py

   1 #! /usr/bin/env python3
   2 #
   3 # Copyright OpenEmbedded Contributors
   4 #
   5 # SPDX-License-Identifier: GPL-2.0-only
   6 #
   7
   8 import argparse
   9 import collections
  10 import json
  11 import os
  12 import os.path
  13 import pathlib
  14 import re
  15 import subprocess
  16
  17 # TODO
  18 # - option to just list all broken files
  19 # - test suite
  20 # - validate signed-off-by
  21
  22 status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
  23
  24 class Result:
  25     # Whether the patch has an Upstream-Status or not
  26     missing_upstream_status = False
  27     # If the Upstream-Status tag is malformed in some way (string for bad bit)
  28     malformed_upstream_status = None
  29     # If the Upstream-Status value is unknown (boolean)
  30     unknown_upstream_status = False
  31     # The upstream status value (Pending, etc)
  32     upstream_status = None
  33     # Whether the patch has a Signed-off-by or not
  34     missing_sob = False
  35     # Whether the Signed-off-by tag is malformed in some way
  36     malformed_sob = False
  37     # The Signed-off-by tag value
  38     sob = None
  39     # Whether a patch looks like a CVE but doesn't have a CVE tag
  40     missing_cve = False
  41
  42 def blame_patch(patch):
  43     """
  44     From a patch filename, return a list of "commit summary (author name <author
  45     email>)" strings representing the history.
  46     """
  47     return subprocess.check_output(("git", "log",
  48                                     "--follow", "--find-renames", "--diff-filter=A",
  49                                     "--format=%s (%aN <%aE>)",
  50                                     "--", patch)).decode("utf-8").splitlines()
  51
  52 def patchreview(patches):
  53
  54     # General pattern: start of line, optional whitespace, tag with optional
  55     # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
  56     # insensitive.
  57     sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
  58     status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
  59     cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
  60     cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
  61
  62     results = {}
  63
  64     for patch in patches:
  65
  66         result = Result()
  67         results[patch] = result
  68
  69         content = open(patch, encoding='ascii', errors='ignore').read()
  70
  71         # Find the Signed-off-by tag
  72         match = sob_re.search(content)
  73         if match:
  74             value = match.group(1)
  75             if value != "Signed-off-by:":
  76                 result.malformed_sob = value
  77             result.sob = match.group(2)
  78         else:
  79             result.missing_sob = True
  80
  81
  82         # Find the Upstream-Status tag
  83         match = status_re.search(content)
  84         if match:
  85             value = match.group(1)
  86             if value != "Upstream-Status:":
  87                 result.malformed_upstream_status = value
  88
  89             value = match.group(2).lower()
  90             # TODO: check case
  91             if value not in status_values:
  92                 result.unknown_upstream_status = True
  93             result.upstream_status = value
  94         else:
  95             result.missing_upstream_status = True
  96
  97         # Check that patches which looks like CVEs have CVE tags
  98         if cve_re.search(patch) or cve_re.search(content):
  99             if not cve_tag_re.search(content):
 100                 result.missing_cve = True
 101         # TODO: extract CVE list
 102
 103     return results
 104
 105
 106 def analyse(results, want_blame=False, verbose=True):
 107     """
 108     want_blame: display blame data for each malformed patch
 109     verbose: display per-file results instead of just summary
 110     """
 111
 112     # want_blame requires verbose, so disable blame if we're not verbose
 113     if want_blame and not verbose:
 114         want_blame = False
 115
 116     total_patches = 0
 117     missing_sob = 0
 118     malformed_sob = 0
 119     missing_status = 0
 120     malformed_status = 0
 121     missing_cve = 0
 122     pending_patches = 0
 123
 124     for patch in sorted(results):
 125         r = results[patch]
 126         total_patches += 1
 127         need_blame = False
 128
 129         # Build statistics
 130         if r.missing_sob:
 131             missing_sob += 1
 132         if r.malformed_sob:
 133             malformed_sob += 1
 134         if r.missing_upstream_status:
 135             missing_status += 1
 136         if r.malformed_upstream_status or r.unknown_upstream_status:
 137             malformed_status += 1
 138             # Count patches with no status as pending
 139             pending_patches +=1
 140         if r.missing_cve:
 141             missing_cve += 1
 142         if r.upstream_status == "pending":
 143             pending_patches += 1
 144
 145         # Output warnings
 146         if r.missing_sob:
 147             need_blame = True
 148             if verbose:
 149                 print("Missing Signed-off-by tag (%s)" % patch)
 150         if r.malformed_sob:
 151             need_blame = True
 152             if verbose:
 153                 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
 154         if r.missing_cve:
 155             need_blame = True
 156             if verbose:
 157                 print("Missing CVE tag (%s)" % patch)
 158         if r.missing_upstream_status:
 159             need_blame = True
 160             if verbose:
 161                 print("Missing Upstream-Status tag (%s)" % patch)
 162         if r.malformed_upstream_status:
 163             need_blame = True
 164             if verbose:
 165                 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
 166         if r.unknown_upstream_status:
 167             need_blame = True
 168             if verbose:
 169                 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
 170
 171         if want_blame and need_blame:
 172             print("\n".join(blame_patch(patch)) + "\n")
 173
 174     def percent(num):
 175         try:
 176             return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
 177         except ZeroDivisionError:
 178             return "N/A"
 179
 180     if verbose:
 181         print()
 182
 183     print("""Total patches found: %d
 184 Patches missing Signed-off-by: %s
 185 Patches with malformed Signed-off-by: %s
 186 Patches missing CVE: %s
 187 Patches missing Upstream-Status: %s
 188 Patches with malformed Upstream-Status: %s
 189 Patches in Pending state: %s""" % (total_patches,
 190                                    percent(missing_sob),
 191                                    percent(malformed_sob),
 192                                    percent(missing_cve),
 193                                    percent(missing_status),
 194                                    percent(malformed_status),
 195                                    percent(pending_patches)))
 196
 197
 198
 199 def histogram(results):
 200     from toolz import recipes, dicttoolz
 201     import math
 202
 203     counts = recipes.countby(lambda r: r.upstream_status, results.values())
 204     bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
 205     for k in bars:
 206         print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
 207
 208 def find_layers(candidate):
 209     # candidate can either be the path to a layer directly (eg meta-intel), or a
 210     # repository that contains other layers (meta-arm). We can determine what by
 211     # looking for a conf/layer.conf file. If that file exists then it's a layer,
 212     # otherwise its a repository of layers and we can assume they're called
 213     # meta-*.
 214
 215     if (candidate / "conf" / "layer.conf").exists():
 216         return [candidate.absolute()]
 217     else:
 218         return [d.absolute() for d in candidate.iterdir() if d.is_dir() and (d.name == "meta" or d.name.startswith("meta-"))]
 219
 220 # TODO these don't actually handle dynamic-layers/
 221
 222 def gather_patches(layers):
 223     patches = []
 224     for directory in layers:
 225         filenames = subprocess.check_output(("git", "-C", directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff"), universal_newlines=True).split()
 226         patches += [os.path.join(directory, f) for f in filenames]
 227     return patches
 228
 229 def count_recipes(layers):
 230     count = 0
 231     for directory in layers:
 232         output = subprocess.check_output(["git", "-C", directory, "ls-files", "recipes-*/**/*.bb"], universal_newlines=True)
 233         count += len(output.splitlines())
 234     return count
 235
 236 if __name__ == "__main__":
 237     args = argparse.ArgumentParser(description="Patch Review Tool")
 238     args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
 239     args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
 240     args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
 241     args.add_argument("-j", "--json", help="update JSON")
 242     args.add_argument("directory", type=pathlib.Path, metavar="DIRECTORY", help="directory to scan (layer, or repository of layers)")
 243     args = args.parse_args()
 244
 245     layers = find_layers(args.directory)
 246     print(f"Found layers {' '.join((d.name for d in layers))}")
 247     patches = gather_patches(layers)
 248     results = patchreview(patches)
 249     analyse(results, want_blame=args.blame, verbose=args.verbose)
 250
 251     if args.json:
 252         if os.path.isfile(args.json):
 253             data = json.load(open(args.json))
 254         else:
 255             data = []
 256
 257         row = collections.Counter()
 258         row["total"] = len(results)
 259         row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"], universal_newlines=True).strip()
 260         row["commit"] = subprocess.check_output(["git", "-C", args.directory, "rev-parse", "HEAD"], universal_newlines=True).strip()
 261         row['commit_count'] = subprocess.check_output(["git", "-C", args.directory, "rev-list", "--count", "HEAD"], universal_newlines=True).strip()
 262         row['recipe_count'] = count_recipes(layers)
 263
 264         for r in results.values():
 265             if r.upstream_status in status_values:
 266                 row[r.upstream_status] += 1
 267             if r.malformed_upstream_status or r.missing_upstream_status:
 268                 row['malformed-upstream-status'] += 1
 269             if r.malformed_sob or r.missing_sob:
 270                 row['malformed-sob'] += 1
 271
 272         data.append(row)
 273         json.dump(data, open(args.json, "w"), sort_keys=True, indent="\t")
 274
 275     if args.histogram:
 276         print()
 277         histogram(results)