1 #! /usr/bin/env python3
3 # Copyright OpenEmbedded Contributors
5 # SPDX-License-Identifier: GPL-2.0-only
18 # - option to just list all broken files
20 # - validate signed-off-by
22 status_values
= ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
25 # Whether the patch has an Upstream-Status or not
26 missing_upstream_status
= False
27 # If the Upstream-Status tag is malformed in some way (string for bad bit)
28 malformed_upstream_status
= None
29 # If the Upstream-Status value is unknown (boolean)
30 unknown_upstream_status
= False
31 # The upstream status value (Pending, etc)
32 upstream_status
= None
33 # Whether the patch has a Signed-off-by or not
35 # Whether the Signed-off-by tag is malformed in some way
37 # The Signed-off-by tag value
39 # Whether a patch looks like a CVE but doesn't have a CVE tag
42 def blame_patch(patch
):
44 From a patch filename, return a list of "commit summary (author name <author
45 email>)" strings representing the history.
47 return subprocess
.check_output(("git", "log",
48 "--follow", "--find-renames", "--diff-filter=A",
49 "--format=%s (%aN <%aE>)",
50 "--", patch
)).decode("utf-8").splitlines()
52 def patchreview(patches
):
54 # General pattern: start of line, optional whitespace, tag with optional
55 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
57 sob_re
= re
.compile(r
"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re
.IGNORECASE | re
.MULTILINE
)
58 status_re
= re
.compile(r
"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re
.IGNORECASE | re
.MULTILINE
)
59 cve_tag_re
= re
.compile(r
"^[\t ]*(CVE:)[\t ]*(.*)", re
.IGNORECASE | re
.MULTILINE
)
60 cve_re
= re
.compile(r
"cve-[0-9]{4}-[0-9]{4,6}", re
.IGNORECASE
)
67 results
[patch
] = result
69 content
= open(patch
, encoding
='ascii', errors
='ignore').read()
71 # Find the Signed-off-by tag
72 match
= sob_re
.search(content
)
74 value
= match
.group(1)
75 if value
!= "Signed-off-by:":
76 result
.malformed_sob
= value
77 result
.sob
= match
.group(2)
79 result
.missing_sob
= True
82 # Find the Upstream-Status tag
83 match
= status_re
.search(content
)
85 value
= match
.group(1)
86 if value
!= "Upstream-Status:":
87 result
.malformed_upstream_status
= value
89 value
= match
.group(2).lower()
91 if value
not in status_values
:
92 result
.unknown_upstream_status
= True
93 result
.upstream_status
= value
95 result
.missing_upstream_status
= True
97 # Check that patches which looks like CVEs have CVE tags
98 if cve_re
.search(patch
) or cve_re
.search(content
):
99 if not cve_tag_re
.search(content
):
100 result
.missing_cve
= True
101 # TODO: extract CVE list
106 def analyse(results
, want_blame
=False, verbose
=True):
108 want_blame: display blame data for each malformed patch
109 verbose: display per-file results instead of just summary
112 # want_blame requires verbose, so disable blame if we're not verbose
113 if want_blame
and not verbose
:
124 for patch
in sorted(results
):
134 if r
.missing_upstream_status
:
136 if r
.malformed_upstream_status
or r
.unknown_upstream_status
:
137 malformed_status
+= 1
138 # Count patches with no status as pending
142 if r
.upstream_status
== "pending":
149 print("Missing Signed-off-by tag (%s)" % patch
)
153 print("Malformed Signed-off-by '%s' (%s)" % (r
.malformed_sob
, patch
))
157 print("Missing CVE tag (%s)" % patch
)
158 if r
.missing_upstream_status
:
161 print("Missing Upstream-Status tag (%s)" % patch
)
162 if r
.malformed_upstream_status
:
165 print("Malformed Upstream-Status '%s' (%s)" % (r
.malformed_upstream_status
, patch
))
166 if r
.unknown_upstream_status
:
169 print("Unknown Upstream-Status value '%s' (%s)" % (r
.upstream_status
, patch
))
171 if want_blame
and need_blame
:
172 print("\n".join(blame_patch(patch
)) + "\n")
176 return "%d (%d%%)" % (num
, round(num
* 100.0 / total_patches
))
177 except ZeroDivisionError:
183 print("""Total patches found: %d
184 Patches missing Signed-off-by: %s
185 Patches with malformed Signed-off-by: %s
186 Patches missing CVE: %s
187 Patches missing Upstream-Status: %s
188 Patches with malformed Upstream-Status: %s
189 Patches in Pending state: %s""" % (total_patches
,
190 percent(missing_sob
),
191 percent(malformed_sob
),
192 percent(missing_cve
),
193 percent(missing_status
),
194 percent(malformed_status
),
195 percent(pending_patches
)))
199 def histogram(results
):
200 from toolz
import recipes
, dicttoolz
203 counts
= recipes
.countby(lambda r
: r
.upstream_status
, results
.values())
204 bars
= dicttoolz
.valmap(lambda v
: "#" * int(math
.ceil(float(v
) / len(results
) * 100)), counts
)
206 print("%-20s %s (%d)" % (k
.capitalize() if k
else "No status", bars
[k
], counts
[k
]))
208 def find_layers(candidate
):
209 # candidate can either be the path to a layer directly (eg meta-intel), or a
210 # repository that contains other layers (meta-arm). We can determine what by
211 # looking for a conf/layer.conf file. If that file exists then it's a layer,
212 # otherwise its a repository of layers and we can assume they're called
215 if (candidate
/ "conf" / "layer.conf").exists():
216 return [candidate
.absolute()]
218 return [d
.absolute() for d
in candidate
.iterdir() if d
.is_dir() and (d
.name
== "meta" or d
.name
.startswith("meta-"))]
220 # TODO these don't actually handle dynamic-layers/
222 def gather_patches(layers
):
224 for directory
in layers
:
225 filenames
= subprocess
.check_output(("git", "-C", directory
, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff"), universal_newlines
=True).split()
226 patches
+= [os
.path
.join(directory
, f
) for f
in filenames
]
229 def count_recipes(layers
):
231 for directory
in layers
:
232 output
= subprocess
.check_output(["git", "-C", directory
, "ls-files", "recipes-*/**/*.bb"], universal_newlines
=True)
233 count
+= len(output
.splitlines())
236 if __name__
== "__main__":
237 args
= argparse
.ArgumentParser(description
="Patch Review Tool")
238 args
.add_argument("-b", "--blame", action
="store_true", help="show blame for malformed patches")
239 args
.add_argument("-v", "--verbose", action
="store_true", help="show per-patch results")
240 args
.add_argument("-g", "--histogram", action
="store_true", help="show patch histogram")
241 args
.add_argument("-j", "--json", help="update JSON")
242 args
.add_argument("directory", type=pathlib
.Path
, metavar
="DIRECTORY", help="directory to scan (layer, or repository of layers)")
243 args
= args
.parse_args()
245 layers
= find_layers(args
.directory
)
246 print(f
"Found layers {' '.join((d.name for d in layers))}")
247 patches
= gather_patches(layers
)
248 results
= patchreview(patches
)
249 analyse(results
, want_blame
=args
.blame
, verbose
=args
.verbose
)
252 if os
.path
.isfile(args
.json
):
253 data
= json
.load(open(args
.json
))
257 row
= collections
.Counter()
258 row
["total"] = len(results
)
259 row
["date"] = subprocess
.check_output(["git", "-C", args
.directory
, "show", "-s", "--pretty=format:%cd", "--date=format:%s"], universal_newlines
=True).strip()
260 row
["commit"] = subprocess
.check_output(["git", "-C", args
.directory
, "rev-parse", "HEAD"], universal_newlines
=True).strip()
261 row
['commit_count'] = subprocess
.check_output(["git", "-C", args
.directory
, "rev-list", "--count", "HEAD"], universal_newlines
=True).strip()
262 row
['recipe_count'] = count_recipes(layers
)
264 for r
in results
.values():
265 if r
.upstream_status
in status_values
:
266 row
[r
.upstream_status
] += 1
267 if r
.malformed_upstream_status
or r
.missing_upstream_status
:
268 row
['malformed-upstream-status'] += 1
269 if r
.malformed_sob
or r
.missing_sob
:
270 row
['malformed-sob'] += 1
273 json
.dump(data
, open(args
.json
, "w"), sort_keys
=True, indent
="\t")