]> git.ipfire.org Git - thirdparty/gcc.git/blame - contrib/mklog.py
Update copyright years.
[thirdparty/gcc.git] / contrib / mklog.py
CommitLineData
00243d9a
ML
1#!/usr/bin/env python3
2
a945c346 3# Copyright (C) 2020-2024 Free Software Foundation, Inc.
00243d9a
ML
4#
5# This file is part of GCC.
6#
7# GCC is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 3, or (at your option)
10# any later version.
11#
12# GCC is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with GCC; see the file COPYING. If not, write to
19# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301, USA.
21
22# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23# and adds a skeleton ChangeLog file to the file. It does not try to be
24# too smart when parsing function names, but it produces a reasonable
25# approximation.
26#
27# Author: Martin Liska <mliska@suse.cz>
28
29import argparse
cf76bbf8 30import datetime
18ef76d3 31import json
00243d9a
ML
32import os
33import re
cf76bbf8 34import subprocess
00243d9a 35import sys
757dbb59 36from itertools import takewhile
00243d9a 37
4f85a52c
ML
38import requests
39
00243d9a
ML
40from unidiff import PatchSet
41
b838641b
ML
42LINE_LIMIT = 100
43TAB_WIDTH = 8
f3f6ff7b
MP
44
45# Initial commit:
46# +--------------------------------------------------+
47# | gccrs: Some title |
48# | | This is the "start"
49# | This is some text explaining the commit. |
50# | There can be several lines. |
51# | |<------------------->
52# | Signed-off-by: My Name <my@mail.com> | This is the "end"
53# +--------------------------------------------------+
54#
55# Results in:
56# +--------------------------------------------------+
57# | gccrs: Some title |
58# | |
59# | This is some text explaining the commit. | This is the "start"
60# | There can be several lines. |
61# | |<------------------->
62# | gcc/rust/ChangeLog: |
63# | | This is the generated
64# | * some_file (bla): | ChangeLog part
65# | (foo): |
66# | |<------------------->
67# | Signed-off-by: My Name <my@mail.com> | This is the "end"
68# +--------------------------------------------------+
69
70# this regex matches the first line of the "end" in the initial commit message
71FIRST_LINE_OF_END_RE = re.compile('(?i)^(signed-off-by:|co-authored-by:|#)')
b838641b 72
00243d9a 73pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
58e3b17f 74prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
e7c7cdc5 75dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
ed5ae55e 76dg_regex = re.compile(r'{\s+dg-(error|warning)')
edf0c3ff 77pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
00243d9a
ML
78identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
79comment_regex = re.compile(r'^\/\*')
4f85a52c
ML
80struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
81 r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
00243d9a
ML
82macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
83super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
84fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
85template_and_param_regex = re.compile(r'<[^<>]*>')
a2d04f3d 86md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
25c284f1 87bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
edf0c3ff 88 'include_fields=summary,component'
00243d9a 89
a2d04f3d 90function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
00243d9a 91
432b9f61
MF
92# NB: Makefile.in isn't listed as it's not always generated.
93generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
94
00243d9a
ML
95help_message = """\
96Generate ChangeLog template for PATCH.
97PATCH must be generated using diff(1)'s -up or -cp options
98(or their equivalent in git).
99"""
100
101script_folder = os.path.realpath(__file__)
e54da1b6 102root = os.path.dirname(os.path.dirname(script_folder))
00243d9a
ML
103
104
105def find_changelog(path):
106 folder = os.path.split(path)[0]
107 while True:
e54da1b6 108 if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
00243d9a
ML
109 return folder
110 folder = os.path.dirname(folder)
111 if folder == '':
112 return folder
113 raise AssertionError()
114
115
116def extract_function_name(line):
117 if comment_regex.match(line):
118 return None
119 m = struct_regex.search(line)
120 if m:
121 # Struct declaration
4f85a52c 122 return m.group(1) + ' ' + m.group(3)
00243d9a
ML
123 m = macro_regex.search(line)
124 if m:
125 # Macro definition
126 return m.group(2)
127 m = super_macro_regex.search(line)
128 if m:
129 # Supermacro
130 return m.group(1)
131 m = fn_regex.search(line)
132 if m:
133 # Discard template and function parameters.
134 fn = m.group(1)
135 fn = re.sub(template_and_param_regex, '', fn)
136 return fn.rstrip()
137 return None
138
139
140def try_add_function(functions, line):
141 fn = extract_function_name(line)
142 if fn and fn not in functions:
143 functions.append(fn)
144 return bool(fn)
145
146
147def sort_changelog_files(changed_file):
148 return (changed_file.is_added_file, changed_file.is_removed_file)
149
150
151def get_pr_titles(prs):
edf0c3ff
TB
152 output = []
153 for idx, pr in enumerate(prs):
d6d9561c
ML
154 pr_id = pr.split('/')[-1]
155 r = requests.get(bugzilla_url % pr_id)
25c284f1
ML
156 bugs = r.json()['bugs']
157 if len(bugs) == 1:
edf0c3ff
TB
158 prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
159 out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
160 if out not in output:
161 output.append(out)
25c284f1 162 if output:
edf0c3ff
TB
163 output.append('')
164 return '\n'.join(output)
00243d9a 165
4f85a52c 166
b838641b
ML
167def append_changelog_line(out, relative_path, text):
168 line = f'\t* {relative_path}:'
169 if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
170 out += f'{line} {text}\n'
171 else:
172 out += f'{line}\n'
173 out += f'\t{text}\n'
174 return out
175
176
e370a248
ML
177def get_rel_path_if_prefixed(path, folder):
178 if path.startswith(folder):
179 return path[len(folder):].lstrip('/')
180 else:
181 return path
182
183
edf0c3ff
TB
184def generate_changelog(data, no_functions=False, fill_pr_titles=False,
185 additional_prs=None):
03c0b064
ML
186 global prs
187 prs = []
188
00243d9a
ML
189 changelogs = {}
190 changelog_list = []
00243d9a
ML
191 out = ''
192 diff = PatchSet(data)
193
edf0c3ff 194 if additional_prs:
84f906df
ML
195 for apr in additional_prs:
196 if not apr.startswith('PR ') and '/' in apr:
197 apr = 'PR ' + apr
198 if apr not in prs:
199 prs.append(apr)
00243d9a 200 for file in diff:
236d6a33
ML
201 # skip files that can't be parsed
202 if file.path == '/dev/null':
203 continue
00243d9a
ML
204 changelog = find_changelog(file.path)
205 if changelog not in changelogs:
206 changelogs[changelog] = []
207 changelog_list.append(changelog)
208 changelogs[changelog].append(file)
209
210 # Extract PR entries from newly added tests
211 if 'testsuite' in file.path and file.is_added_file:
ed5ae55e
TB
212 # Only search first ten lines as later lines may
213 # contains commented code which a note that it
214 # has not been tested due to a certain PR or DR.
edf0c3ff 215 this_file_prs = []
e1652147
ML
216 hunks = list(file)
217 if hunks:
218 for line in hunks[0][0:10]:
14d0f82c 219 m = pr_regex.search(line.value)
e7c7cdc5 220 if m:
14d0f82c
ML
221 pr = m.group('pr')
222 if pr not in prs:
223 prs.append(pr)
224 this_file_prs.append(pr.split('/')[-1])
225 else:
226 m = dr_regex.search(line.value)
227 if m:
228 dr = m.group('dr')
229 if dr not in prs:
230 prs.append(dr)
231 this_file_prs.append(dr.split('/')[-1])
232 elif dg_regex.search(line.value):
233 # Found dg-warning/dg-error line
234 break
235
edf0c3ff
TB
236 # PR number in the file name
237 fname = os.path.basename(file.path)
238 m = pr_filename_regex.search(fname)
239 if m:
240 pr = m.group('pr')
241 pr2 = 'PR ' + pr
242 if pr not in this_file_prs and pr2 not in prs:
243 prs.append(pr2)
00243d9a
ML
244
245 if fill_pr_titles:
246 out += get_pr_titles(prs)
247
fef084dc
ML
248 # print list of PR entries before ChangeLog entries
249 if prs:
250 if not out:
251 out += '\n'
252 for pr in prs:
253 out += '\t%s\n' % pr
254 out += '\n'
255
00243d9a
ML
256 # sort ChangeLog so that 'testsuite' is at the end
257 for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
258 files = changelogs[changelog]
259 out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
260 out += '\n'
00243d9a
ML
261 # new and deleted files should be at the end
262 for file in sorted(files, key=sort_changelog_files):
263 assert file.path.startswith(changelog)
264 in_tests = 'testsuite' in changelog or 'testsuite' in file.path
e370a248 265 relative_path = get_rel_path_if_prefixed(file.path, changelog)
00243d9a
ML
266 functions = []
267 if file.is_added_file:
b838641b
ML
268 msg = 'New test.' if in_tests else 'New file.'
269 out = append_changelog_line(out, relative_path, msg)
00243d9a 270 elif file.is_removed_file:
b838641b 271 out = append_changelog_line(out, relative_path, 'Removed.')
eb78da45 272 elif hasattr(file, 'is_rename') and file.is_rename:
eb78da45
ML
273 # A file can be theoretically moved to a location that
274 # belongs to a different ChangeLog. Let user fix it.
e370a248
ML
275 #
276 # Since unidiff 0.7.0, path.file == path.target_file[2:],
277 # it used to be path.source_file[2:]
278 relative_path = get_rel_path_if_prefixed(file.source_file[2:],
279 changelog)
280 out = append_changelog_line(out, relative_path, 'Moved to...')
281 new_path = get_rel_path_if_prefixed(file.target_file[2:],
282 changelog)
283 out += f'\t* {new_path}: ...here.\n'
432b9f61
MF
284 elif os.path.basename(file.path) in generated_files:
285 out += '\t* %s: Regenerate.\n' % (relative_path)
b838641b 286 append_changelog_line(out, relative_path, 'Regenerate.')
00243d9a
ML
287 else:
288 if not no_functions:
289 for hunk in file:
290 # Do not add function names for testsuite files
291 extension = os.path.splitext(relative_path)[1]
292 if not in_tests and extension in function_extensions:
293 last_fn = None
294 modified_visited = False
295 success = False
296 for line in hunk:
297 m = identifier_regex.match(line.value)
298 if line.is_added or line.is_removed:
a2d04f3d
ML
299 # special-case definition in .md files
300 m2 = md_def_regex.match(line.value)
301 if extension == '.md' and m2:
302 fn = m2.group(1)
303 if fn not in functions:
304 functions.append(fn)
305 last_fn = None
306 success = True
307
00243d9a
ML
308 if not line.value.strip():
309 continue
310 modified_visited = True
311 if m and try_add_function(functions,
312 m.group(1)):
313 last_fn = None
314 success = True
315 elif line.is_context:
316 if last_fn and modified_visited:
317 try_add_function(functions, last_fn)
318 last_fn = None
319 modified_visited = False
320 success = True
321 elif m:
322 last_fn = m.group(1)
323 modified_visited = False
324 if not success:
325 try_add_function(functions,
326 hunk.section_header)
327 if functions:
328 out += '\t* %s (%s):\n' % (relative_path, functions[0])
329 for fn in functions[1:]:
330 out += '\t(%s):\n' % fn
331 else:
332 out += '\t* %s:\n' % relative_path
333 out += '\n'
334 return out
335
336
cf76bbf8
ML
337def update_copyright(data):
338 current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
339 username = subprocess.check_output('git config user.name', shell=True,
340 encoding='utf8').strip()
341 email = subprocess.check_output('git config user.email', shell=True,
342 encoding='utf8').strip()
343
344 changelogs = set()
345 diff = PatchSet(data)
346
347 for file in diff:
348 changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
349 if changelog not in changelogs:
350 changelogs.add(changelog)
351 with open(changelog) as f:
352 content = f.read()
353 with open(changelog, 'w+') as f:
354 f.write(f'{current_timestamp} {username} <{email}>\n\n')
355 f.write('\tUpdate copyright years.\n\n')
356 f.write(content)
357
358
0684c8d3 359def skip_line_in_changelog(line):
5f187974
MJ
360 return FIRST_LINE_OF_END_RE.match(line) is None
361
0684c8d3 362
00243d9a 363if __name__ == '__main__':
18ef76d3
ML
364 extra_args = os.getenv('GCC_MKLOG_ARGS')
365 if extra_args:
366 sys.argv += json.loads(extra_args)
367
00243d9a
ML
368 parser = argparse.ArgumentParser(description=help_message)
369 parser.add_argument('input', nargs='?',
370 help='Patch file (or missing, read standard input)')
edf0c3ff 371 parser.add_argument('-b', '--pr-numbers', action='store',
48b312b4 372 type=lambda arg: arg.split(','), nargs='?',
edf0c3ff 373 help='Add the specified PRs (comma separated)')
00243d9a
ML
374 parser.add_argument('-s', '--no-functions', action='store_true',
375 help='Do not generate function names in ChangeLogs')
376 parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
377 help='Download title of mentioned PRs')
e54da1b6 378 parser.add_argument('-d', '--directory',
c38f679e
ML
379 help='Root directory where to search for ChangeLog '
380 'files')
757dbb59
JM
381 parser.add_argument('-c', '--changelog',
382 help='Append the ChangeLog to a git commit message '
383 'file')
cf76bbf8
ML
384 parser.add_argument('--update-copyright', action='store_true',
385 help='Update copyright in ChangeLog files')
7f0700a6
LD
386 parser.add_argument('-a', '--append', action='store_true',
387 help='Append the generate ChangeLog to the patch file')
00243d9a
ML
388 args = parser.parse_args()
389 if args.input == '-':
390 args.input = None
e54da1b6
ML
391 if args.directory:
392 root = args.directory
00243d9a 393
b0451799 394 data = open(args.input, newline='\n') if args.input else sys.stdin
cf76bbf8
ML
395 if args.update_copyright:
396 update_copyright(data)
757dbb59 397 else:
cf76bbf8 398 output = generate_changelog(data, args.no_functions,
edf0c3ff 399 args.fill_up_bug_titles, args.pr_numbers)
7f0700a6
LD
400 if args.append:
401 if (not args.input):
ebffc840
LD
402 raise Exception("`-a or --append` option not support standard "
403 "input")
7f0700a6
LD
404 lines = []
405 with open(args.input, 'r', newline='\n') as f:
406 # 1 -> not find the possible start of diff log
407 # 2 -> find the possible start of diff log
408 # 3 -> finish add ChangeLog to the patch file
409 maybe_diff_log = 1
410 for line in f:
411 if maybe_diff_log == 1 and line == "---\n":
412 maybe_diff_log = 2
ebffc840
LD
413 elif (maybe_diff_log == 2 and
414 re.match(r"\s[^\s]+\s+\|\s+\d+\s[+\-]+\n", line)):
7f0700a6
LD
415 lines += [output, "---\n", line]
416 maybe_diff_log = 3
417 else:
418 # the possible start is not the true start.
419 if maybe_diff_log == 2:
ebffc840 420 lines.append("---\n")
7f0700a6
LD
421 maybe_diff_log = 1
422 lines.append(line)
423 with open(args.input, "w") as f:
424 f.writelines(lines)
425 elif args.changelog:
cf76bbf8 426 lines = open(args.changelog).read().split('\n')
0684c8d3 427 start = list(takewhile(skip_line_in_changelog, lines))
cf76bbf8
ML
428 end = lines[len(start):]
429 with open(args.changelog, 'w') as f:
58e3b17f 430 if not start or not start[0]:
03c0b064
ML
431 if len(prs) == 1:
432 # initial commit subject line 'component: [PRnnnnn]'
433 m = prnum_regex.match(prs[0])
434 if m:
435 title = f'{m.group("comp")}: [PR{m.group("num")}]'
436 start.insert(0, title)
cf76bbf8 437 if start:
58e3b17f 438 # append empty line
cf76bbf8
ML
439 if start[-1] != '':
440 start.append('')
441 else:
442 # append 2 empty lines
443 start = 2 * ['']
444 f.write('\n'.join(start))
445 f.write('\n')
446 f.write(output)
447 f.write('\n'.join(end))
448 else:
449 print(output, end='')