]>
Commit | Line | Data |
---|---|---|
00243d9a ML |
1 | #!/usr/bin/env python3 |
2 | ||
a945c346 | 3 | # Copyright (C) 2020-2024 Free Software Foundation, Inc. |
00243d9a ML |
4 | # |
5 | # This file is part of GCC. | |
6 | # | |
7 | # GCC is free software; you can redistribute it and/or modify | |
8 | # it under the terms of the GNU General Public License as published by | |
9 | # the Free Software Foundation; either version 3, or (at your option) | |
10 | # any later version. | |
11 | # | |
12 | # GCC is distributed in the hope that it will be useful, | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | # GNU General Public License for more details. | |
16 | # | |
17 | # You should have received a copy of the GNU General Public License | |
18 | # along with GCC; see the file COPYING. If not, write to | |
19 | # the Free Software Foundation, 51 Franklin Street, Fifth Floor, | |
20 | # Boston, MA 02110-1301, USA. | |
21 | ||
22 | # This script parses a .diff file generated with 'diff -up' or 'diff -cp' | |
23 | # and adds a skeleton ChangeLog file to the file. It does not try to be | |
24 | # too smart when parsing function names, but it produces a reasonable | |
25 | # approximation. | |
26 | # | |
27 | # Author: Martin Liska <mliska@suse.cz> | |
28 | ||
29 | import argparse | |
cf76bbf8 | 30 | import datetime |
18ef76d3 | 31 | import json |
00243d9a ML |
32 | import os |
33 | import re | |
cf76bbf8 | 34 | import subprocess |
00243d9a | 35 | import sys |
757dbb59 | 36 | from itertools import takewhile |
00243d9a | 37 | |
4f85a52c ML |
38 | import requests |
39 | ||
00243d9a ML |
40 | from unidiff import PatchSet |
41 | ||
b838641b ML |
42 | LINE_LIMIT = 100 |
43 | TAB_WIDTH = 8 | |
f3f6ff7b MP |
44 | |
45 | # Initial commit: | |
46 | # +--------------------------------------------------+ | |
47 | # | gccrs: Some title | | |
48 | # | | This is the "start" | |
49 | # | This is some text explaining the commit. | | |
50 | # | There can be several lines. | | |
51 | # | |<-------------------> | |
52 | # | Signed-off-by: My Name <my@mail.com> | This is the "end" | |
53 | # +--------------------------------------------------+ | |
54 | # | |
55 | # Results in: | |
56 | # +--------------------------------------------------+ | |
57 | # | gccrs: Some title | | |
58 | # | | | |
59 | # | This is some text explaining the commit. | This is the "start" | |
60 | # | There can be several lines. | | |
61 | # | |<-------------------> | |
62 | # | gcc/rust/ChangeLog: | | |
63 | # | | This is the generated | |
64 | # | * some_file (bla): | ChangeLog part | |
65 | # | (foo): | | |
66 | # | |<-------------------> | |
67 | # | Signed-off-by: My Name <my@mail.com> | This is the "end" | |
68 | # +--------------------------------------------------+ | |
69 | ||
70 | # this regex matches the first line of the "end" in the initial commit message | |
71 | FIRST_LINE_OF_END_RE = re.compile('(?i)^(signed-off-by:|co-authored-by:|#)') | |
b838641b | 72 | |
00243d9a | 73 | pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)') |
58e3b17f | 74 | prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)') |
e7c7cdc5 | 75 | dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)') |
ed5ae55e | 76 | dg_regex = re.compile(r'{\s+dg-(error|warning)') |
edf0c3ff | 77 | pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})') |
00243d9a ML |
78 | identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)') |
79 | comment_regex = re.compile(r'^\/\*') | |
4f85a52c ML |
80 | struct_regex = re.compile(r'^(class|struct|union|enum)\s+' |
81 | r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)') | |
00243d9a ML |
82 | macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)') |
83 | super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)') | |
84 | fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]') | |
85 | template_and_param_regex = re.compile(r'<[^<>]*>') | |
a2d04f3d | 86 | md_def_regex = re.compile(r'\(define.*\s+"(.*)"') |
25c284f1 | 87 | bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \ |
edf0c3ff | 88 | 'include_fields=summary,component' |
00243d9a | 89 | |
a2d04f3d | 90 | function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'} |
00243d9a | 91 | |
432b9f61 MF |
92 | # NB: Makefile.in isn't listed as it's not always generated. |
93 | generated_files = {'aclocal.m4', 'config.h.in', 'configure'} | |
94 | ||
00243d9a ML |
95 | help_message = """\ |
96 | Generate ChangeLog template for PATCH. | |
97 | PATCH must be generated using diff(1)'s -up or -cp options | |
98 | (or their equivalent in git). | |
99 | """ | |
100 | ||
101 | script_folder = os.path.realpath(__file__) | |
e54da1b6 | 102 | root = os.path.dirname(os.path.dirname(script_folder)) |
00243d9a ML |
103 | |
104 | ||
105 | def find_changelog(path): | |
106 | folder = os.path.split(path)[0] | |
107 | while True: | |
e54da1b6 | 108 | if os.path.exists(os.path.join(root, folder, 'ChangeLog')): |
00243d9a ML |
109 | return folder |
110 | folder = os.path.dirname(folder) | |
111 | if folder == '': | |
112 | return folder | |
113 | raise AssertionError() | |
114 | ||
115 | ||
116 | def extract_function_name(line): | |
117 | if comment_regex.match(line): | |
118 | return None | |
119 | m = struct_regex.search(line) | |
120 | if m: | |
121 | # Struct declaration | |
4f85a52c | 122 | return m.group(1) + ' ' + m.group(3) |
00243d9a ML |
123 | m = macro_regex.search(line) |
124 | if m: | |
125 | # Macro definition | |
126 | return m.group(2) | |
127 | m = super_macro_regex.search(line) | |
128 | if m: | |
129 | # Supermacro | |
130 | return m.group(1) | |
131 | m = fn_regex.search(line) | |
132 | if m: | |
133 | # Discard template and function parameters. | |
134 | fn = m.group(1) | |
135 | fn = re.sub(template_and_param_regex, '', fn) | |
136 | return fn.rstrip() | |
137 | return None | |
138 | ||
139 | ||
140 | def try_add_function(functions, line): | |
141 | fn = extract_function_name(line) | |
142 | if fn and fn not in functions: | |
143 | functions.append(fn) | |
144 | return bool(fn) | |
145 | ||
146 | ||
147 | def sort_changelog_files(changed_file): | |
148 | return (changed_file.is_added_file, changed_file.is_removed_file) | |
149 | ||
150 | ||
151 | def get_pr_titles(prs): | |
edf0c3ff TB |
152 | output = [] |
153 | for idx, pr in enumerate(prs): | |
d6d9561c ML |
154 | pr_id = pr.split('/')[-1] |
155 | r = requests.get(bugzilla_url % pr_id) | |
25c284f1 ML |
156 | bugs = r.json()['bugs'] |
157 | if len(bugs) == 1: | |
edf0c3ff TB |
158 | prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id) |
159 | out = '%s - %s\n' % (prs[idx], bugs[0]['summary']) | |
160 | if out not in output: | |
161 | output.append(out) | |
25c284f1 | 162 | if output: |
edf0c3ff TB |
163 | output.append('') |
164 | return '\n'.join(output) | |
00243d9a | 165 | |
4f85a52c | 166 | |
b838641b ML |
167 | def append_changelog_line(out, relative_path, text): |
168 | line = f'\t* {relative_path}:' | |
169 | if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT: | |
170 | out += f'{line} {text}\n' | |
171 | else: | |
172 | out += f'{line}\n' | |
173 | out += f'\t{text}\n' | |
174 | return out | |
175 | ||
176 | ||
e370a248 ML |
177 | def get_rel_path_if_prefixed(path, folder): |
178 | if path.startswith(folder): | |
179 | return path[len(folder):].lstrip('/') | |
180 | else: | |
181 | return path | |
182 | ||
183 | ||
edf0c3ff TB |
184 | def generate_changelog(data, no_functions=False, fill_pr_titles=False, |
185 | additional_prs=None): | |
03c0b064 ML |
186 | global prs |
187 | prs = [] | |
188 | ||
00243d9a ML |
189 | changelogs = {} |
190 | changelog_list = [] | |
00243d9a ML |
191 | out = '' |
192 | diff = PatchSet(data) | |
193 | ||
edf0c3ff | 194 | if additional_prs: |
84f906df ML |
195 | for apr in additional_prs: |
196 | if not apr.startswith('PR ') and '/' in apr: | |
197 | apr = 'PR ' + apr | |
198 | if apr not in prs: | |
199 | prs.append(apr) | |
00243d9a | 200 | for file in diff: |
236d6a33 ML |
201 | # skip files that can't be parsed |
202 | if file.path == '/dev/null': | |
203 | continue | |
00243d9a ML |
204 | changelog = find_changelog(file.path) |
205 | if changelog not in changelogs: | |
206 | changelogs[changelog] = [] | |
207 | changelog_list.append(changelog) | |
208 | changelogs[changelog].append(file) | |
209 | ||
210 | # Extract PR entries from newly added tests | |
211 | if 'testsuite' in file.path and file.is_added_file: | |
ed5ae55e TB |
212 | # Only search first ten lines as later lines may |
213 | # contains commented code which a note that it | |
214 | # has not been tested due to a certain PR or DR. | |
edf0c3ff | 215 | this_file_prs = [] |
e1652147 ML |
216 | hunks = list(file) |
217 | if hunks: | |
218 | for line in hunks[0][0:10]: | |
14d0f82c | 219 | m = pr_regex.search(line.value) |
e7c7cdc5 | 220 | if m: |
14d0f82c ML |
221 | pr = m.group('pr') |
222 | if pr not in prs: | |
223 | prs.append(pr) | |
224 | this_file_prs.append(pr.split('/')[-1]) | |
225 | else: | |
226 | m = dr_regex.search(line.value) | |
227 | if m: | |
228 | dr = m.group('dr') | |
229 | if dr not in prs: | |
230 | prs.append(dr) | |
231 | this_file_prs.append(dr.split('/')[-1]) | |
232 | elif dg_regex.search(line.value): | |
233 | # Found dg-warning/dg-error line | |
234 | break | |
235 | ||
edf0c3ff TB |
236 | # PR number in the file name |
237 | fname = os.path.basename(file.path) | |
238 | m = pr_filename_regex.search(fname) | |
239 | if m: | |
240 | pr = m.group('pr') | |
241 | pr2 = 'PR ' + pr | |
242 | if pr not in this_file_prs and pr2 not in prs: | |
243 | prs.append(pr2) | |
00243d9a ML |
244 | |
245 | if fill_pr_titles: | |
246 | out += get_pr_titles(prs) | |
247 | ||
fef084dc ML |
248 | # print list of PR entries before ChangeLog entries |
249 | if prs: | |
250 | if not out: | |
251 | out += '\n' | |
252 | for pr in prs: | |
253 | out += '\t%s\n' % pr | |
254 | out += '\n' | |
255 | ||
00243d9a ML |
256 | # sort ChangeLog so that 'testsuite' is at the end |
257 | for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x): | |
258 | files = changelogs[changelog] | |
259 | out += '%s:\n' % os.path.join(changelog, 'ChangeLog') | |
260 | out += '\n' | |
00243d9a ML |
261 | # new and deleted files should be at the end |
262 | for file in sorted(files, key=sort_changelog_files): | |
263 | assert file.path.startswith(changelog) | |
264 | in_tests = 'testsuite' in changelog or 'testsuite' in file.path | |
e370a248 | 265 | relative_path = get_rel_path_if_prefixed(file.path, changelog) |
00243d9a ML |
266 | functions = [] |
267 | if file.is_added_file: | |
b838641b ML |
268 | msg = 'New test.' if in_tests else 'New file.' |
269 | out = append_changelog_line(out, relative_path, msg) | |
00243d9a | 270 | elif file.is_removed_file: |
b838641b | 271 | out = append_changelog_line(out, relative_path, 'Removed.') |
eb78da45 | 272 | elif hasattr(file, 'is_rename') and file.is_rename: |
eb78da45 ML |
273 | # A file can be theoretically moved to a location that |
274 | # belongs to a different ChangeLog. Let user fix it. | |
e370a248 ML |
275 | # |
276 | # Since unidiff 0.7.0, path.file == path.target_file[2:], | |
277 | # it used to be path.source_file[2:] | |
278 | relative_path = get_rel_path_if_prefixed(file.source_file[2:], | |
279 | changelog) | |
1e74ce89 | 280 | out = append_changelog_line(out, relative_path, 'Move to...') |
e370a248 ML |
281 | new_path = get_rel_path_if_prefixed(file.target_file[2:], |
282 | changelog) | |
283 | out += f'\t* {new_path}: ...here.\n' | |
432b9f61 MF |
284 | elif os.path.basename(file.path) in generated_files: |
285 | out += '\t* %s: Regenerate.\n' % (relative_path) | |
b838641b | 286 | append_changelog_line(out, relative_path, 'Regenerate.') |
00243d9a ML |
287 | else: |
288 | if not no_functions: | |
289 | for hunk in file: | |
290 | # Do not add function names for testsuite files | |
291 | extension = os.path.splitext(relative_path)[1] | |
292 | if not in_tests and extension in function_extensions: | |
293 | last_fn = None | |
294 | modified_visited = False | |
295 | success = False | |
296 | for line in hunk: | |
297 | m = identifier_regex.match(line.value) | |
298 | if line.is_added or line.is_removed: | |
a2d04f3d ML |
299 | # special-case definition in .md files |
300 | m2 = md_def_regex.match(line.value) | |
301 | if extension == '.md' and m2: | |
302 | fn = m2.group(1) | |
303 | if fn not in functions: | |
304 | functions.append(fn) | |
305 | last_fn = None | |
306 | success = True | |
307 | ||
00243d9a ML |
308 | if not line.value.strip(): |
309 | continue | |
310 | modified_visited = True | |
311 | if m and try_add_function(functions, | |
312 | m.group(1)): | |
313 | last_fn = None | |
314 | success = True | |
315 | elif line.is_context: | |
316 | if last_fn and modified_visited: | |
317 | try_add_function(functions, last_fn) | |
318 | last_fn = None | |
319 | modified_visited = False | |
320 | success = True | |
321 | elif m: | |
322 | last_fn = m.group(1) | |
323 | modified_visited = False | |
324 | if not success: | |
325 | try_add_function(functions, | |
326 | hunk.section_header) | |
327 | if functions: | |
328 | out += '\t* %s (%s):\n' % (relative_path, functions[0]) | |
329 | for fn in functions[1:]: | |
330 | out += '\t(%s):\n' % fn | |
331 | else: | |
332 | out += '\t* %s:\n' % relative_path | |
333 | out += '\n' | |
334 | return out | |
335 | ||
336 | ||
cf76bbf8 ML |
337 | def update_copyright(data): |
338 | current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d') | |
339 | username = subprocess.check_output('git config user.name', shell=True, | |
340 | encoding='utf8').strip() | |
341 | email = subprocess.check_output('git config user.email', shell=True, | |
342 | encoding='utf8').strip() | |
343 | ||
344 | changelogs = set() | |
345 | diff = PatchSet(data) | |
346 | ||
347 | for file in diff: | |
348 | changelog = os.path.join(find_changelog(file.path), 'ChangeLog') | |
349 | if changelog not in changelogs: | |
350 | changelogs.add(changelog) | |
351 | with open(changelog) as f: | |
352 | content = f.read() | |
353 | with open(changelog, 'w+') as f: | |
354 | f.write(f'{current_timestamp} {username} <{email}>\n\n') | |
355 | f.write('\tUpdate copyright years.\n\n') | |
356 | f.write(content) | |
357 | ||
358 | ||
0684c8d3 | 359 | def skip_line_in_changelog(line): |
5f187974 MJ |
360 | return FIRST_LINE_OF_END_RE.match(line) is None |
361 | ||
0684c8d3 | 362 | |
00243d9a | 363 | if __name__ == '__main__': |
18ef76d3 ML |
364 | extra_args = os.getenv('GCC_MKLOG_ARGS') |
365 | if extra_args: | |
366 | sys.argv += json.loads(extra_args) | |
367 | ||
00243d9a ML |
368 | parser = argparse.ArgumentParser(description=help_message) |
369 | parser.add_argument('input', nargs='?', | |
370 | help='Patch file (or missing, read standard input)') | |
edf0c3ff | 371 | parser.add_argument('-b', '--pr-numbers', action='store', |
48b312b4 | 372 | type=lambda arg: arg.split(','), nargs='?', |
edf0c3ff | 373 | help='Add the specified PRs (comma separated)') |
00243d9a ML |
374 | parser.add_argument('-s', '--no-functions', action='store_true', |
375 | help='Do not generate function names in ChangeLogs') | |
376 | parser.add_argument('-p', '--fill-up-bug-titles', action='store_true', | |
377 | help='Download title of mentioned PRs') | |
e54da1b6 | 378 | parser.add_argument('-d', '--directory', |
c38f679e ML |
379 | help='Root directory where to search for ChangeLog ' |
380 | 'files') | |
757dbb59 JM |
381 | parser.add_argument('-c', '--changelog', |
382 | help='Append the ChangeLog to a git commit message ' | |
383 | 'file') | |
cf76bbf8 ML |
384 | parser.add_argument('--update-copyright', action='store_true', |
385 | help='Update copyright in ChangeLog files') | |
7f0700a6 LD |
386 | parser.add_argument('-a', '--append', action='store_true', |
387 | help='Append the generate ChangeLog to the patch file') | |
00243d9a ML |
388 | args = parser.parse_args() |
389 | if args.input == '-': | |
390 | args.input = None | |
e54da1b6 ML |
391 | if args.directory: |
392 | root = args.directory | |
00243d9a | 393 | |
b0451799 | 394 | data = open(args.input, newline='\n') if args.input else sys.stdin |
cf76bbf8 ML |
395 | if args.update_copyright: |
396 | update_copyright(data) | |
757dbb59 | 397 | else: |
cf76bbf8 | 398 | output = generate_changelog(data, args.no_functions, |
edf0c3ff | 399 | args.fill_up_bug_titles, args.pr_numbers) |
7f0700a6 LD |
400 | if args.append: |
401 | if (not args.input): | |
ebffc840 LD |
402 | raise Exception("`-a or --append` option not support standard " |
403 | "input") | |
7f0700a6 LD |
404 | lines = [] |
405 | with open(args.input, 'r', newline='\n') as f: | |
406 | # 1 -> not find the possible start of diff log | |
407 | # 2 -> find the possible start of diff log | |
408 | # 3 -> finish add ChangeLog to the patch file | |
409 | maybe_diff_log = 1 | |
410 | for line in f: | |
411 | if maybe_diff_log == 1 and line == "---\n": | |
412 | maybe_diff_log = 2 | |
ebffc840 LD |
413 | elif (maybe_diff_log == 2 and |
414 | re.match(r"\s[^\s]+\s+\|\s+\d+\s[+\-]+\n", line)): | |
7f0700a6 LD |
415 | lines += [output, "---\n", line] |
416 | maybe_diff_log = 3 | |
417 | else: | |
418 | # the possible start is not the true start. | |
419 | if maybe_diff_log == 2: | |
ebffc840 | 420 | lines.append("---\n") |
7f0700a6 LD |
421 | maybe_diff_log = 1 |
422 | lines.append(line) | |
423 | with open(args.input, "w") as f: | |
424 | f.writelines(lines) | |
425 | elif args.changelog: | |
cf76bbf8 | 426 | lines = open(args.changelog).read().split('\n') |
0684c8d3 | 427 | start = list(takewhile(skip_line_in_changelog, lines)) |
cf76bbf8 ML |
428 | end = lines[len(start):] |
429 | with open(args.changelog, 'w') as f: | |
58e3b17f | 430 | if not start or not start[0]: |
03c0b064 ML |
431 | if len(prs) == 1: |
432 | # initial commit subject line 'component: [PRnnnnn]' | |
433 | m = prnum_regex.match(prs[0]) | |
434 | if m: | |
435 | title = f'{m.group("comp")}: [PR{m.group("num")}]' | |
436 | start.insert(0, title) | |
cf76bbf8 | 437 | if start: |
58e3b17f | 438 | # append empty line |
cf76bbf8 ML |
439 | if start[-1] != '': |
440 | start.append('') | |
441 | else: | |
442 | # append 2 empty lines | |
443 | start = 2 * [''] | |
444 | f.write('\n'.join(start)) | |
445 | f.write('\n') | |
446 | f.write(output) | |
447 | f.write('\n'.join(end)) | |
448 | else: | |
449 | print(output, end='') |