]> git.ipfire.org Git - thirdparty/gcc.git/blob - contrib/mklog.py
Avoid assuming maximum string length is constant [PR102960].
[thirdparty/gcc.git] / contrib / mklog.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) 2020 Free Software Foundation, Inc.
4 #
5 # This file is part of GCC.
6 #
7 # GCC is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3, or (at your option)
10 # any later version.
11 #
12 # GCC is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING. If not, write to
19 # the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 # Boston, MA 02110-1301, USA.
21
22 # This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23 # and adds a skeleton ChangeLog file to the file. It does not try to be
24 # too smart when parsing function names, but it produces a reasonable
25 # approximation.
26 #
27 # Author: Martin Liska <mliska@suse.cz>
28
29 import argparse
30 import datetime
31 import os
32 import re
33 import subprocess
34 import sys
35 from itertools import takewhile
36
37 import requests
38
39 from unidiff import PatchSet
40
41 LINE_LIMIT = 100
42 TAB_WIDTH = 8
43 CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
44
45 pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
46 prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
47 dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
48 dg_regex = re.compile(r'{\s+dg-(error|warning)')
49 pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
50 identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
51 comment_regex = re.compile(r'^\/\*')
52 struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
53 r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
54 macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
55 super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
56 fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
57 template_and_param_regex = re.compile(r'<[^<>]*>')
58 md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
59 bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
60 'include_fields=summary,component'
61
62 function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
63
64 # NB: Makefile.in isn't listed as it's not always generated.
65 generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
66
67 help_message = """\
68 Generate ChangeLog template for PATCH.
69 PATCH must be generated using diff(1)'s -up or -cp options
70 (or their equivalent in git).
71 """
72
73 script_folder = os.path.realpath(__file__)
74 root = os.path.dirname(os.path.dirname(script_folder))
75
76 firstpr = ''
77
78
79 def find_changelog(path):
80 folder = os.path.split(path)[0]
81 while True:
82 if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
83 return folder
84 folder = os.path.dirname(folder)
85 if folder == '':
86 return folder
87 raise AssertionError()
88
89
90 def extract_function_name(line):
91 if comment_regex.match(line):
92 return None
93 m = struct_regex.search(line)
94 if m:
95 # Struct declaration
96 return m.group(1) + ' ' + m.group(3)
97 m = macro_regex.search(line)
98 if m:
99 # Macro definition
100 return m.group(2)
101 m = super_macro_regex.search(line)
102 if m:
103 # Supermacro
104 return m.group(1)
105 m = fn_regex.search(line)
106 if m:
107 # Discard template and function parameters.
108 fn = m.group(1)
109 fn = re.sub(template_and_param_regex, '', fn)
110 return fn.rstrip()
111 return None
112
113
114 def try_add_function(functions, line):
115 fn = extract_function_name(line)
116 if fn and fn not in functions:
117 functions.append(fn)
118 return bool(fn)
119
120
121 def sort_changelog_files(changed_file):
122 return (changed_file.is_added_file, changed_file.is_removed_file)
123
124
125 def get_pr_titles(prs):
126 output = []
127 for idx, pr in enumerate(prs):
128 pr_id = pr.split('/')[-1]
129 r = requests.get(bugzilla_url % pr_id)
130 bugs = r.json()['bugs']
131 if len(bugs) == 1:
132 prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
133 out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
134 if out not in output:
135 output.append(out)
136 if output:
137 output.append('')
138 return '\n'.join(output)
139
140
141 def append_changelog_line(out, relative_path, text):
142 line = f'\t* {relative_path}:'
143 if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
144 out += f'{line} {text}\n'
145 else:
146 out += f'{line}\n'
147 out += f'\t{text}\n'
148 return out
149
150
151 def get_rel_path_if_prefixed(path, folder):
152 if path.startswith(folder):
153 return path[len(folder):].lstrip('/')
154 else:
155 return path
156
157
158 def generate_changelog(data, no_functions=False, fill_pr_titles=False,
159 additional_prs=None):
160 changelogs = {}
161 changelog_list = []
162 prs = []
163 out = ''
164 diff = PatchSet(data)
165 global firstpr
166
167 if additional_prs:
168 for apr in additional_prs:
169 if not apr.startswith('PR ') and '/' in apr:
170 apr = 'PR ' + apr
171 if apr not in prs:
172 prs.append(apr)
173 for file in diff:
174 # skip files that can't be parsed
175 if file.path == '/dev/null':
176 continue
177 changelog = find_changelog(file.path)
178 if changelog not in changelogs:
179 changelogs[changelog] = []
180 changelog_list.append(changelog)
181 changelogs[changelog].append(file)
182
183 # Extract PR entries from newly added tests
184 if 'testsuite' in file.path and file.is_added_file:
185 # Only search first ten lines as later lines may
186 # contains commented code which a note that it
187 # has not been tested due to a certain PR or DR.
188 this_file_prs = []
189 for line in list(file)[0][0:10]:
190 m = pr_regex.search(line.value)
191 if m:
192 pr = m.group('pr')
193 if pr not in prs:
194 prs.append(pr)
195 this_file_prs.append(pr.split('/')[-1])
196 else:
197 m = dr_regex.search(line.value)
198 if m:
199 dr = m.group('dr')
200 if dr not in prs:
201 prs.append(dr)
202 this_file_prs.append(dr.split('/')[-1])
203 elif dg_regex.search(line.value):
204 # Found dg-warning/dg-error line
205 break
206 # PR number in the file name
207 fname = os.path.basename(file.path)
208 m = pr_filename_regex.search(fname)
209 if m:
210 pr = m.group('pr')
211 pr2 = 'PR ' + pr
212 if pr not in this_file_prs and pr2 not in prs:
213 prs.append(pr2)
214
215 if prs:
216 firstpr = prs[0]
217
218 if fill_pr_titles:
219 out += get_pr_titles(prs)
220
221 # print list of PR entries before ChangeLog entries
222 if prs:
223 if not out:
224 out += '\n'
225 for pr in prs:
226 out += '\t%s\n' % pr
227 out += '\n'
228
229 # sort ChangeLog so that 'testsuite' is at the end
230 for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
231 files = changelogs[changelog]
232 out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
233 out += '\n'
234 # new and deleted files should be at the end
235 for file in sorted(files, key=sort_changelog_files):
236 assert file.path.startswith(changelog)
237 in_tests = 'testsuite' in changelog or 'testsuite' in file.path
238 relative_path = get_rel_path_if_prefixed(file.path, changelog)
239 functions = []
240 if file.is_added_file:
241 msg = 'New test.' if in_tests else 'New file.'
242 out = append_changelog_line(out, relative_path, msg)
243 elif file.is_removed_file:
244 out = append_changelog_line(out, relative_path, 'Removed.')
245 elif hasattr(file, 'is_rename') and file.is_rename:
246 # A file can be theoretically moved to a location that
247 # belongs to a different ChangeLog. Let user fix it.
248 #
249 # Since unidiff 0.7.0, path.file == path.target_file[2:],
250 # it used to be path.source_file[2:]
251 relative_path = get_rel_path_if_prefixed(file.source_file[2:],
252 changelog)
253 out = append_changelog_line(out, relative_path, 'Moved to...')
254 new_path = get_rel_path_if_prefixed(file.target_file[2:],
255 changelog)
256 out += f'\t* {new_path}: ...here.\n'
257 elif os.path.basename(file.path) in generated_files:
258 out += '\t* %s: Regenerate.\n' % (relative_path)
259 append_changelog_line(out, relative_path, 'Regenerate.')
260 else:
261 if not no_functions:
262 for hunk in file:
263 # Do not add function names for testsuite files
264 extension = os.path.splitext(relative_path)[1]
265 if not in_tests and extension in function_extensions:
266 last_fn = None
267 modified_visited = False
268 success = False
269 for line in hunk:
270 m = identifier_regex.match(line.value)
271 if line.is_added or line.is_removed:
272 # special-case definition in .md files
273 m2 = md_def_regex.match(line.value)
274 if extension == '.md' and m2:
275 fn = m2.group(1)
276 if fn not in functions:
277 functions.append(fn)
278 last_fn = None
279 success = True
280
281 if not line.value.strip():
282 continue
283 modified_visited = True
284 if m and try_add_function(functions,
285 m.group(1)):
286 last_fn = None
287 success = True
288 elif line.is_context:
289 if last_fn and modified_visited:
290 try_add_function(functions, last_fn)
291 last_fn = None
292 modified_visited = False
293 success = True
294 elif m:
295 last_fn = m.group(1)
296 modified_visited = False
297 if not success:
298 try_add_function(functions,
299 hunk.section_header)
300 if functions:
301 out += '\t* %s (%s):\n' % (relative_path, functions[0])
302 for fn in functions[1:]:
303 out += '\t(%s):\n' % fn
304 else:
305 out += '\t* %s:\n' % relative_path
306 out += '\n'
307 return out
308
309
310 def update_copyright(data):
311 current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
312 username = subprocess.check_output('git config user.name', shell=True,
313 encoding='utf8').strip()
314 email = subprocess.check_output('git config user.email', shell=True,
315 encoding='utf8').strip()
316
317 changelogs = set()
318 diff = PatchSet(data)
319
320 for file in diff:
321 changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
322 if changelog not in changelogs:
323 changelogs.add(changelog)
324 with open(changelog) as f:
325 content = f.read()
326 with open(changelog, 'w+') as f:
327 f.write(f'{current_timestamp} {username} <{email}>\n\n')
328 f.write('\tUpdate copyright years.\n\n')
329 f.write(content)
330
331
332 def skip_line_in_changelog(line):
333 if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'):
334 return False
335 return True
336
337
338 if __name__ == '__main__':
339 parser = argparse.ArgumentParser(description=help_message)
340 parser.add_argument('input', nargs='?',
341 help='Patch file (or missing, read standard input)')
342 parser.add_argument('-b', '--pr-numbers', action='store',
343 type=lambda arg: arg.split(','), nargs='?',
344 help='Add the specified PRs (comma separated)')
345 parser.add_argument('-s', '--no-functions', action='store_true',
346 help='Do not generate function names in ChangeLogs')
347 parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
348 help='Download title of mentioned PRs')
349 parser.add_argument('-d', '--directory',
350 help='Root directory where to search for ChangeLog '
351 'files')
352 parser.add_argument('-c', '--changelog',
353 help='Append the ChangeLog to a git commit message '
354 'file')
355 parser.add_argument('--update-copyright', action='store_true',
356 help='Update copyright in ChangeLog files')
357 args = parser.parse_args()
358 if args.input == '-':
359 args.input = None
360 if args.directory:
361 root = args.directory
362
363 data = open(args.input) if args.input else sys.stdin
364 if args.update_copyright:
365 update_copyright(data)
366 else:
367 output = generate_changelog(data, args.no_functions,
368 args.fill_up_bug_titles, args.pr_numbers)
369 if args.changelog:
370 lines = open(args.changelog).read().split('\n')
371 start = list(takewhile(skip_line_in_changelog, lines))
372 end = lines[len(start):]
373 with open(args.changelog, 'w') as f:
374 if not start or not start[0]:
375 # initial commit subject line 'component: [PRnnnnn]'
376 m = prnum_regex.match(firstpr)
377 if m:
378 title = f'{m.group("comp")}: [PR{m.group("num")}]'
379 start.insert(0, title)
380 if start:
381 # append empty line
382 if start[-1] != '':
383 start.append('')
384 else:
385 # append 2 empty lines
386 start = 2 * ['']
387 f.write('\n'.join(start))
388 f.write('\n')
389 f.write(output)
390 f.write('\n'.join(end))
391 else:
392 print(output, end='')