[thirdparty/gcc.git] / contrib / mklog.py

#!/usr/bin/env python3

# Copyright (C) 2020-2024 Free Software Foundation, Inc.
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GCC is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING.  If not, write to
# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.

# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
# and adds a skeleton ChangeLog file to the file. It does not try to be
# too smart when parsing function names, but it produces a reasonable
# approximation.
#
# Author: Martin Liska <mliska@suse.cz>

import argparse
import datetime
import json
import os
import re
import subprocess
import sys
from itertools import takewhile

import requests

from unidiff import PatchSet

LINE_LIMIT = 100
TAB_WIDTH = 8

# Initial commit:
#   +--------------------------------------------------+
#   | gccrs: Some title                                |
#   |                                                  | This is the "start"
#   | This is some text explaining the commit.         |
#   | There can be several lines.                      |
#   |                                                  |<------------------->
#   | Signed-off-by: My Name <my@mail.com>             | This is the "end"
#   +--------------------------------------------------+
#
# Results in:
#   +--------------------------------------------------+
#   | gccrs: Some title                                |
#   |                                                  |
#   | This is some text explaining the commit.         | This is the "start"
#   | There can be several lines.                      |
#   |                                                  |<------------------->
#   | gcc/rust/ChangeLog:                              |
#   |                                                  | This is the generated
#   |         * some_file (bla):                       | ChangeLog part
#   |         (foo):                                   |
#   |                                                  |<------------------->
#   | Signed-off-by: My Name <my@mail.com>             | This is the "end"
#   +--------------------------------------------------+

# this regex matches the first line of the "end" in the initial commit message
FIRST_LINE_OF_END_RE = re.compile('(?i)^(signed-off-by:|co-authored-by:|#)')

pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
dg_regex = re.compile(r'{\s+dg-(error|warning)')
pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
comment_regex = re.compile(r'^\/\*')
struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
                          r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
template_and_param_regex = re.compile(r'<[^<>]*>')
md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
               'include_fields=summary,component'

function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}

# NB: Makefile.in isn't listed as it's not always generated.
generated_files = {'aclocal.m4', 'config.h.in', 'configure'}

help_message = """\
Generate ChangeLog template for PATCH.
PATCH must be generated using diff(1)'s -up or -cp options
(or their equivalent in git).
"""

script_folder = os.path.realpath(__file__)
root = os.path.dirname(os.path.dirname(script_folder))


def find_changelog(path):
    folder = os.path.split(path)[0]
    while True:
        if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
            return folder
        folder = os.path.dirname(folder)
        if folder == '':
            return folder
    raise AssertionError()


def extract_function_name(line):
    if comment_regex.match(line):
        return None
    m = struct_regex.search(line)
    if m:
        # Struct declaration
        return m.group(1) + ' ' + m.group(3)
    m = macro_regex.search(line)
    if m:
        # Macro definition
        return m.group(2)
    m = super_macro_regex.search(line)
    if m:
        # Supermacro
        return m.group(1)
    m = fn_regex.search(line)
    if m:
        # Discard template and function parameters.
        fn = m.group(1)
        fn = re.sub(template_and_param_regex, '', fn)
        return fn.rstrip()
    return None


def try_add_function(functions, line):
    fn = extract_function_name(line)
    if fn and fn not in functions:
        functions.append(fn)
    return bool(fn)


def sort_changelog_files(changed_file):
    return (changed_file.is_added_file, changed_file.is_removed_file)


def get_pr_titles(prs):
    output = []
    for idx, pr in enumerate(prs):
        pr_id = pr.split('/')[-1]
        r = requests.get(bugzilla_url % pr_id)
        bugs = r.json()['bugs']
        if len(bugs) == 1:
            prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
            out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
            if out not in output:
                output.append(out)
    if output:
        output.append('')
    return '\n'.join(output)


def append_changelog_line(out, relative_path, text):
    line = f'\t* {relative_path}:'
    if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
        out += f'{line} {text}\n'
    else:
        out += f'{line}\n'
        out += f'\t{text}\n'
    return out


def get_rel_path_if_prefixed(path, folder):
    if path.startswith(folder):
        return path[len(folder):].lstrip('/')
    else:
        return path


def generate_changelog(data, no_functions=False, fill_pr_titles=False,
                       additional_prs=None):
    global prs
    prs = []

    changelogs = {}
    changelog_list = []
    out = ''
    diff = PatchSet(data)

    if additional_prs:
        for apr in additional_prs:
            if not apr.startswith('PR ') and '/' in apr:
                apr = 'PR ' + apr
            if apr not in prs:
                prs.append(apr)
    for file in diff:
        # skip files that can't be parsed
        if file.path == '/dev/null':
            continue
        changelog = find_changelog(file.path)
        if changelog not in changelogs:
            changelogs[changelog] = []
            changelog_list.append(changelog)
        changelogs[changelog].append(file)

        # Extract PR entries from newly added tests
        if 'testsuite' in file.path and file.is_added_file:
            # Only search first ten lines as later lines may
            # contains commented code which a note that it
            # has not been tested due to a certain PR or DR.
            this_file_prs = []
            hunks = list(file)
            if hunks:
                for line in hunks[0][0:10]:
                    m = pr_regex.search(line.value)
                    if m:
                        pr = m.group('pr')
                        if pr not in prs:
                            prs.append(pr)
                            this_file_prs.append(pr.split('/')[-1])
                    else:
                        m = dr_regex.search(line.value)
                        if m:
                            dr = m.group('dr')
                            if dr not in prs:
                                prs.append(dr)
                                this_file_prs.append(dr.split('/')[-1])
                        elif dg_regex.search(line.value):
                            # Found dg-warning/dg-error line
                            break

            # PR number in the file name
            fname = os.path.basename(file.path)
            m = pr_filename_regex.search(fname)
            if m:
                pr = m.group('pr')
                pr2 = 'PR ' + pr
                if pr not in this_file_prs and pr2 not in prs:
                    prs.append(pr2)

    if fill_pr_titles:
        out += get_pr_titles(prs)

    # print list of PR entries before ChangeLog entries
    if prs:
        if not out:
            out += '\n'
        for pr in prs:
            out += '\t%s\n' % pr
        out += '\n'

    # sort ChangeLog so that 'testsuite' is at the end
    for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
        files = changelogs[changelog]
        out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
        out += '\n'
        # new and deleted files should be at the end
        for file in sorted(files, key=sort_changelog_files):
            assert file.path.startswith(changelog)
            in_tests = 'testsuite' in changelog or 'testsuite' in file.path
            relative_path = get_rel_path_if_prefixed(file.path, changelog)
            functions = []
            if file.is_added_file:
                msg = 'New test.' if in_tests else 'New file.'
                out = append_changelog_line(out, relative_path, msg)
            elif file.is_removed_file:
                out = append_changelog_line(out, relative_path, 'Removed.')
            elif hasattr(file, 'is_rename') and file.is_rename:
                # A file can be theoretically moved to a location that
                # belongs to a different ChangeLog.  Let user fix it.
                #
                # Since unidiff 0.7.0, path.file == path.target_file[2:],
                # it used to be path.source_file[2:]
                relative_path = get_rel_path_if_prefixed(file.source_file[2:],
                                                         changelog)
                out = append_changelog_line(out, relative_path, 'Move to...')
                new_path = get_rel_path_if_prefixed(file.target_file[2:],
                                                    changelog)
                out += f'\t* {new_path}: ...here.\n'
            elif os.path.basename(file.path) in generated_files:
                out += '\t* %s: Regenerate.\n' % (relative_path)
                append_changelog_line(out, relative_path, 'Regenerate.')
            else:
                if not no_functions:
                    for hunk in file:
                        # Do not add function names for testsuite files
                        extension = os.path.splitext(relative_path)[1]
                        if not in_tests and extension in function_extensions:
                            last_fn = None
                            modified_visited = False
                            success = False
                            for line in hunk:
                                m = identifier_regex.match(line.value)
                                if line.is_added or line.is_removed:
                                    # special-case definition in .md files
                                    m2 = md_def_regex.match(line.value)
                                    if extension == '.md' and m2:
                                        fn = m2.group(1)
                                        if fn not in functions:
                                            functions.append(fn)
                                            last_fn = None
                                            success = True

                                    if not line.value.strip():
                                        continue
                                    modified_visited = True
                                    if m and try_add_function(functions,
                                                              m.group(1)):
                                        last_fn = None
                                        success = True
                                elif line.is_context:
                                    if last_fn and modified_visited:
                                        try_add_function(functions, last_fn)
                                        last_fn = None
                                        modified_visited = False
                                        success = True
                                    elif m:
                                        last_fn = m.group(1)
                                        modified_visited = False
                            if not success:
                                try_add_function(functions,
                                                 hunk.section_header)
                if functions:
                    out += '\t* %s (%s):\n' % (relative_path, functions[0])
                    for fn in functions[1:]:
                        out += '\t(%s):\n' % fn
                else:
                    out += '\t* %s:\n' % relative_path
        out += '\n'
    return out


def update_copyright(data):
    current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
    username = subprocess.check_output('git config user.name', shell=True,
                                       encoding='utf8').strip()
    email = subprocess.check_output('git config user.email', shell=True,
                                    encoding='utf8').strip()

    changelogs = set()
    diff = PatchSet(data)

    for file in diff:
        changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
        if changelog not in changelogs:
            changelogs.add(changelog)
            with open(changelog) as f:
                content = f.read()
            with open(changelog, 'w+') as f:
                f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
                f.write('\tUpdate copyright years.\n\n')
                f.write(content)


def skip_line_in_changelog(line):
    return FIRST_LINE_OF_END_RE.match(line) is None


if __name__ == '__main__':
    extra_args = os.getenv('GCC_MKLOG_ARGS')
    if extra_args:
        sys.argv += json.loads(extra_args)

    parser = argparse.ArgumentParser(description=help_message)
    parser.add_argument('input', nargs='?',
                        help='Patch file (or missing, read standard input)')
    parser.add_argument('-b', '--pr-numbers', action='store',
                        type=lambda arg: arg.split(','), nargs='?',
                        help='Add the specified PRs (comma separated)')
    parser.add_argument('-s', '--no-functions', action='store_true',
                        help='Do not generate function names in ChangeLogs')
    parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
                        help='Download title of mentioned PRs')
    parser.add_argument('-d', '--directory',
                        help='Root directory where to search for ChangeLog '
                        'files')
    parser.add_argument('-c', '--changelog',
                        help='Append the ChangeLog to a git commit message '
                             'file')
    parser.add_argument('--update-copyright', action='store_true',
                        help='Update copyright in ChangeLog files')
    parser.add_argument('-a', '--append', action='store_true',
                        help='Append the generate ChangeLog to the patch file')
    args = parser.parse_args()
    if args.input == '-':
        args.input = None
    if args.directory:
        root = args.directory

    data = open(args.input, newline='\n') if args.input else sys.stdin
    if args.update_copyright:
        update_copyright(data)
    else:
        output = generate_changelog(data, args.no_functions,
                                    args.fill_up_bug_titles, args.pr_numbers)
        if args.append:
            if (not args.input):
                raise Exception("`-a or --append` option not support standard "
                                "input")
            lines = []
            with open(args.input, 'r', newline='\n') as f:
                # 1 -> not find the possible start of diff log
                # 2 -> find the possible start of diff log
                # 3 -> finish add ChangeLog to the patch file
                maybe_diff_log = 1
                for line in f:
                    if maybe_diff_log == 1 and line == "---\n":
                        maybe_diff_log = 2
                    elif (maybe_diff_log == 2 and
                          re.match(r"\s[^\s]+\s+\|\s+\d+\s[+\-]+\n", line)):
                        lines += [output, "---\n", line]
                        maybe_diff_log = 3
                    else:
                        # the possible start is not the true start.
                        if maybe_diff_log == 2:
                            lines.append("---\n")
                            maybe_diff_log = 1
                        lines.append(line)
            with open(args.input, "w") as f:
                f.writelines(lines)
        elif args.changelog:
            lines = open(args.changelog).read().split('\n')
            start = list(takewhile(skip_line_in_changelog, lines))
            end = lines[len(start):]
            with open(args.changelog, 'w') as f:
                if not start or not start[0]:
                    if len(prs) == 1:
                        # initial commit subject line 'component: [PRnnnnn]'
                        m = prnum_regex.match(prs[0])
                        if m:
                            title = f'{m.group("comp")}: [PR{m.group("num")}]'
                            start.insert(0, title)
                if start:
                    # append empty line
                    if start[-1] != '':
                        start.append('')
                else:
                    # append 2 empty lines
                    start = 2 * ['']
                f.write('\n'.join(start))
                f.write('\n')
                f.write(output)
                f.write('\n'.join(end))
        else:
            print(output, end='')
Commit	Line	Data
00243d9a ML	1	#!/usr/bin/env python3
00243d9a ML	2
a945c346	3	# Copyright (C) 2020-2024 Free Software Foundation, Inc.
00243d9a ML	4	#
	5	# This file is part of GCC.
	6	#
	7	# GCC is free software; you can redistribute it and/or modify
	8	# it under the terms of the GNU General Public License as published by
	9	# the Free Software Foundation; either version 3, or (at your option)
	10	# any later version.
	11	#
	12	# GCC is distributed in the hope that it will be useful,
	13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	# GNU General Public License for more details.
	16	#
	17	# You should have received a copy of the GNU General Public License
	18	# along with GCC; see the file COPYING. If not, write to
	19	# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
	20	# Boston, MA 02110-1301, USA.
	21
	22	# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
	23	# and adds a skeleton ChangeLog file to the file. It does not try to be
	24	# too smart when parsing function names, but it produces a reasonable
	25	# approximation.
	26	#
	27	# Author: Martin Liska <mliska@suse.cz>
	28
	29	import argparse
cf76bbf8	30	import datetime
18ef76d3	31	import json
00243d9a ML	32	import os
00243d9a ML	33	import re
cf76bbf8	34	import subprocess
00243d9a	35	import sys
757dbb59	36	from itertools import takewhile
00243d9a	37
4f85a52c ML	38	import requests
4f85a52c ML	39
00243d9a ML	40	from unidiff import PatchSet
00243d9a ML	41
b838641b ML	42	LINE_LIMIT = 100
b838641b ML	43	TAB_WIDTH = 8
f3f6ff7b MP	44
	45	# Initial commit:
	46	# +--------------------------------------------------+
	47	# \| gccrs: Some title \|
	48	# \| \| This is the "start"
	49	# \| This is some text explaining the commit. \|
	50	# \| There can be several lines. \|
	51	# \| \|<------------------->
	52	# \| Signed-off-by: My Name <my@mail.com> \| This is the "end"
	53	# +--------------------------------------------------+
	54	#
	55	# Results in:
	56	# +--------------------------------------------------+
	57	# \| gccrs: Some title \|
	58	# \| \|
	59	# \| This is some text explaining the commit. \| This is the "start"
	60	# \| There can be several lines. \|
	61	# \| \|<------------------->
	62	# \| gcc/rust/ChangeLog: \|
	63	# \| \| This is the generated
	64	# \| * some_file (bla): \| ChangeLog part
	65	# \| (foo): \|
	66	# \| \|<------------------->
	67	# \| Signed-off-by: My Name <my@mail.com> \| This is the "end"
	68	# +--------------------------------------------------+
	69
	70	# this regex matches the first line of the "end" in the initial commit message
	71	FIRST_LINE_OF_END_RE = re.compile('(?i)^(signed-off-by:\|co-authored-by:\|#)')
b838641b	72
00243d9a	73	pr_regex = re.compile(r'(\/(\/\|\)\|[Cc!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
58e3b17f	74	prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
e7c7cdc5	75	dr_regex = re.compile(r'(\/(\/\|\)\|[Cc!])\s+(?P<dr>DR [0-9]+)')
ed5ae55e	76	dg_regex = re.compile(r'{\s+dg-(error\|warning)')
edf0c3ff	77	pr_filename_regex = re.compile(r'(^\|[\W_])[Pp][Rr](?P<pr>\d{4,})')
00243d9a ML	78	identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
00243d9a ML	79	comment_regex = re.compile(r'^\/\*')
4f85a52c ML	80	struct_regex = re.compile(r'^(class\|struct\|union\|enum)\s+'
4f85a52c ML	81	r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
00243d9a ML	82	macro_regex = re.compile(r'#\s*(define\|undef)\s+([a-zA-Z0-9_]+)')
	83	super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
	84	fn_regex = re.compile(r'([a-zA-Z_][^()\s])\s\([^*]')
	85	template_and_param_regex = re.compile(r'<[^<>]*>')
a2d04f3d	86	md_def_regex = re.compile(r'\(define.\s+"(.)"')
25c284f1	87	bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
edf0c3ff	88	'include_fields=summary,component'
00243d9a	89
a2d04f3d	90	function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
00243d9a	91
432b9f61 MF	92	# NB: Makefile.in isn't listed as it's not always generated.
	93	generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
	94
00243d9a ML	95	help_message = """\
	96	Generate ChangeLog template for PATCH.
	97	PATCH must be generated using diff(1)'s -up or -cp options
	98	(or their equivalent in git).
	99	"""
	100
	101	script_folder = os.path.realpath(__file__)
e54da1b6	102	root = os.path.dirname(os.path.dirname(script_folder))
00243d9a ML	103
	104
	105	def find_changelog(path):
	106	folder = os.path.split(path)[0]
	107	while True:
e54da1b6	108	if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
00243d9a ML	109	return folder
	110	folder = os.path.dirname(folder)
	111	if folder == '':
	112	return folder
	113	raise AssertionError()
	114
	115
	116	def extract_function_name(line):
	117	if comment_regex.match(line):
	118	return None
	119	m = struct_regex.search(line)
	120	if m:
	121	# Struct declaration
4f85a52c	122	return m.group(1) + ' ' + m.group(3)
00243d9a ML	123	m = macro_regex.search(line)
	124	if m:
	125	# Macro definition
	126	return m.group(2)
	127	m = super_macro_regex.search(line)
	128	if m:
	129	# Supermacro
	130	return m.group(1)
	131	m = fn_regex.search(line)
	132	if m:
	133	# Discard template and function parameters.
	134	fn = m.group(1)
	135	fn = re.sub(template_and_param_regex, '', fn)
	136	return fn.rstrip()
	137	return None
	138
	139
	140	def try_add_function(functions, line):
	141	fn = extract_function_name(line)
	142	if fn and fn not in functions:
	143	functions.append(fn)
	144	return bool(fn)
	145
	146
	147	def sort_changelog_files(changed_file):
	148	return (changed_file.is_added_file, changed_file.is_removed_file)
	149
	150
	151	def get_pr_titles(prs):
edf0c3ff TB	152	output = []
edf0c3ff TB	153	for idx, pr in enumerate(prs):
d6d9561c ML	154	pr_id = pr.split('/')[-1]
d6d9561c ML	155	r = requests.get(bugzilla_url % pr_id)
25c284f1 ML	156	bugs = r.json()['bugs']
25c284f1 ML	157	if len(bugs) == 1:
edf0c3ff TB	158	prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
	159	out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
	160	if out not in output:
	161	output.append(out)
25c284f1	162	if output:
edf0c3ff TB	163	output.append('')
edf0c3ff TB	164	return '\n'.join(output)
00243d9a	165
4f85a52c	166
b838641b ML	167	def append_changelog_line(out, relative_path, text):
	168	line = f'\t* {relative_path}:'
	169	if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
	170	out += f'{line} {text}\n'
	171	else:
	172	out += f'{line}\n'
	173	out += f'\t{text}\n'
	174	return out
	175
	176
e370a248 ML	177	def get_rel_path_if_prefixed(path, folder):
	178	if path.startswith(folder):
	179	return path[len(folder):].lstrip('/')
	180	else:
	181	return path
	182
	183
edf0c3ff TB	184	def generate_changelog(data, no_functions=False, fill_pr_titles=False,
edf0c3ff TB	185	additional_prs=None):
03c0b064 ML	186	global prs
	187	prs = []
	188
00243d9a ML	189	changelogs = {}
00243d9a ML	190	changelog_list = []
00243d9a ML	191	out = ''
	192	diff = PatchSet(data)
	193
edf0c3ff	194	if additional_prs:
84f906df ML	195	for apr in additional_prs:
	196	if not apr.startswith('PR ') and '/' in apr:
	197	apr = 'PR ' + apr
	198	if apr not in prs:
	199	prs.append(apr)
00243d9a	200	for file in diff:
236d6a33 ML	201	# skip files that can't be parsed
	202	if file.path == '/dev/null':
	203	continue
00243d9a ML	204	changelog = find_changelog(file.path)
	205	if changelog not in changelogs:
	206	changelogs[changelog] = []
	207	changelog_list.append(changelog)
	208	changelogs[changelog].append(file)
	209
	210	# Extract PR entries from newly added tests
	211	if 'testsuite' in file.path and file.is_added_file:
ed5ae55e TB	212	# Only search first ten lines as later lines may
	213	# contains commented code which a note that it
	214	# has not been tested due to a certain PR or DR.
edf0c3ff	215	this_file_prs = []
e1652147 ML	216	hunks = list(file)
	217	if hunks:
	218	for line in hunks[0][0:10]:
14d0f82c	219	m = pr_regex.search(line.value)
e7c7cdc5	220	if m:
14d0f82c ML	221	pr = m.group('pr')
	222	if pr not in prs:
	223	prs.append(pr)
	224	this_file_prs.append(pr.split('/')[-1])
	225	else:
	226	m = dr_regex.search(line.value)
	227	if m:
	228	dr = m.group('dr')
	229	if dr not in prs:
	230	prs.append(dr)
	231	this_file_prs.append(dr.split('/')[-1])
	232	elif dg_regex.search(line.value):
	233	# Found dg-warning/dg-error line
	234	break
	235
edf0c3ff TB	236	# PR number in the file name
	237	fname = os.path.basename(file.path)
	238	m = pr_filename_regex.search(fname)
	239	if m:
	240	pr = m.group('pr')
	241	pr2 = 'PR ' + pr
	242	if pr not in this_file_prs and pr2 not in prs:
	243	prs.append(pr2)
00243d9a ML	244
	245	if fill_pr_titles:
	246	out += get_pr_titles(prs)
	247
fef084dc ML	248	# print list of PR entries before ChangeLog entries
	249	if prs:
	250	if not out:
	251	out += '\n'
	252	for pr in prs:
	253	out += '\t%s\n' % pr
	254	out += '\n'
	255
00243d9a ML	256	# sort ChangeLog so that 'testsuite' is at the end
	257	for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
	258	files = changelogs[changelog]
	259	out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
	260	out += '\n'
00243d9a ML	261	# new and deleted files should be at the end
	262	for file in sorted(files, key=sort_changelog_files):
	263	assert file.path.startswith(changelog)
	264	in_tests = 'testsuite' in changelog or 'testsuite' in file.path
e370a248	265	relative_path = get_rel_path_if_prefixed(file.path, changelog)
00243d9a ML	266	functions = []
00243d9a ML	267	if file.is_added_file:
b838641b ML	268	msg = 'New test.' if in_tests else 'New file.'
b838641b ML	269	out = append_changelog_line(out, relative_path, msg)
00243d9a	270	elif file.is_removed_file:
b838641b	271	out = append_changelog_line(out, relative_path, 'Removed.')
eb78da45	272	elif hasattr(file, 'is_rename') and file.is_rename:
eb78da45 ML	273	# A file can be theoretically moved to a location that
eb78da45 ML	274	# belongs to a different ChangeLog. Let user fix it.
e370a248 ML	275	#
	276	# Since unidiff 0.7.0, path.file == path.target_file[2:],
	277	# it used to be path.source_file[2:]
	278	relative_path = get_rel_path_if_prefixed(file.source_file[2:],
	279	changelog)
1e74ce89	280	out = append_changelog_line(out, relative_path, 'Move to...')
e370a248 ML	281	new_path = get_rel_path_if_prefixed(file.target_file[2:],
	282	changelog)
	283	out += f'\t* {new_path}: ...here.\n'
432b9f61 MF	284	elif os.path.basename(file.path) in generated_files:
432b9f61 MF	285	out += '\t* %s: Regenerate.\n' % (relative_path)
b838641b	286	append_changelog_line(out, relative_path, 'Regenerate.')
00243d9a ML	287	else:
	288	if not no_functions:
	289	for hunk in file:
	290	# Do not add function names for testsuite files
	291	extension = os.path.splitext(relative_path)[1]
	292	if not in_tests and extension in function_extensions:
	293	last_fn = None
	294	modified_visited = False
	295	success = False
	296	for line in hunk:
	297	m = identifier_regex.match(line.value)
	298	if line.is_added or line.is_removed:
a2d04f3d ML	299	# special-case definition in .md files
	300	m2 = md_def_regex.match(line.value)
	301	if extension == '.md' and m2:
	302	fn = m2.group(1)
	303	if fn not in functions:
	304	functions.append(fn)
	305	last_fn = None
	306	success = True
	307
00243d9a ML	308	if not line.value.strip():
	309	continue
	310	modified_visited = True
	311	if m and try_add_function(functions,
	312	m.group(1)):
	313	last_fn = None
	314	success = True
	315	elif line.is_context:
	316	if last_fn and modified_visited:
	317	try_add_function(functions, last_fn)
	318	last_fn = None
	319	modified_visited = False
	320	success = True
	321	elif m:
	322	last_fn = m.group(1)
	323	modified_visited = False
	324	if not success:
	325	try_add_function(functions,
	326	hunk.section_header)
	327	if functions:
	328	out += '\t* %s (%s):\n' % (relative_path, functions[0])
	329	for fn in functions[1:]:
	330	out += '\t(%s):\n' % fn
	331	else:
	332	out += '\t* %s:\n' % relative_path
	333	out += '\n'
	334	return out
	335
	336
cf76bbf8 ML	337	def update_copyright(data):
	338	current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
	339	username = subprocess.check_output('git config user.name', shell=True,
	340	encoding='utf8').strip()
	341	email = subprocess.check_output('git config user.email', shell=True,
	342	encoding='utf8').strip()
	343
	344	changelogs = set()
	345	diff = PatchSet(data)
	346
	347	for file in diff:
	348	changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
	349	if changelog not in changelogs:
	350	changelogs.add(changelog)
	351	with open(changelog) as f:
	352	content = f.read()
	353	with open(changelog, 'w+') as f:
	354	f.write(f'{current_timestamp} {username} <{email}>\n\n')
	355	f.write('\tUpdate copyright years.\n\n')
	356	f.write(content)
	357
	358
0684c8d3	359	def skip_line_in_changelog(line):
5f187974 MJ	360	return FIRST_LINE_OF_END_RE.match(line) is None
5f187974 MJ	361
0684c8d3	362
00243d9a	363	if __name__ == '__main__':
18ef76d3 ML	364	extra_args = os.getenv('GCC_MKLOG_ARGS')
	365	if extra_args:
	366	sys.argv += json.loads(extra_args)
	367
00243d9a ML	368	parser = argparse.ArgumentParser(description=help_message)
	369	parser.add_argument('input', nargs='?',
	370	help='Patch file (or missing, read standard input)')
edf0c3ff	371	parser.add_argument('-b', '--pr-numbers', action='store',
48b312b4	372	type=lambda arg: arg.split(','), nargs='?',
edf0c3ff	373	help='Add the specified PRs (comma separated)')
00243d9a ML	374	parser.add_argument('-s', '--no-functions', action='store_true',
	375	help='Do not generate function names in ChangeLogs')
	376	parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
	377	help='Download title of mentioned PRs')
e54da1b6	378	parser.add_argument('-d', '--directory',
c38f679e ML	379	help='Root directory where to search for ChangeLog '
c38f679e ML	380	'files')
757dbb59 JM	381	parser.add_argument('-c', '--changelog',
	382	help='Append the ChangeLog to a git commit message '
	383	'file')
cf76bbf8 ML	384	parser.add_argument('--update-copyright', action='store_true',
cf76bbf8 ML	385	help='Update copyright in ChangeLog files')
7f0700a6 LD	386	parser.add_argument('-a', '--append', action='store_true',
7f0700a6 LD	387	help='Append the generate ChangeLog to the patch file')
00243d9a ML	388	args = parser.parse_args()
	389	if args.input == '-':
	390	args.input = None
e54da1b6 ML	391	if args.directory:
e54da1b6 ML	392	root = args.directory
00243d9a	393
b0451799	394	data = open(args.input, newline='\n') if args.input else sys.stdin
cf76bbf8 ML	395	if args.update_copyright:
cf76bbf8 ML	396	update_copyright(data)
757dbb59	397	else:
cf76bbf8	398	output = generate_changelog(data, args.no_functions,
edf0c3ff	399	args.fill_up_bug_titles, args.pr_numbers)
7f0700a6 LD	400	if args.append:
7f0700a6 LD	401	if (not args.input):
ebffc840 LD	402	raise Exception("`-a or --append` option not support standard "
ebffc840 LD	403	"input")
7f0700a6 LD	404	lines = []
	405	with open(args.input, 'r', newline='\n') as f:
	406	# 1 -> not find the possible start of diff log
	407	# 2 -> find the possible start of diff log
	408	# 3 -> finish add ChangeLog to the patch file
	409	maybe_diff_log = 1
	410	for line in f:
	411	if maybe_diff_log == 1 and line == "---\n":
	412	maybe_diff_log = 2
ebffc840 LD	413	elif (maybe_diff_log == 2 and
ebffc840 LD	414	re.match(r"\s[^\s]+\s+\\|\s+\d+\s[+\-]+\n", line)):
7f0700a6 LD	415	lines += [output, "---\n", line]
	416	maybe_diff_log = 3
	417	else:
	418	# the possible start is not the true start.
	419	if maybe_diff_log == 2:
ebffc840	420	lines.append("---\n")
7f0700a6 LD	421	maybe_diff_log = 1
	422	lines.append(line)
	423	with open(args.input, "w") as f:
	424	f.writelines(lines)
	425	elif args.changelog:
cf76bbf8	426	lines = open(args.changelog).read().split('\n')
0684c8d3	427	start = list(takewhile(skip_line_in_changelog, lines))
cf76bbf8 ML	428	end = lines[len(start):]
cf76bbf8 ML	429	with open(args.changelog, 'w') as f:
58e3b17f	430	if not start or not start[0]:
03c0b064 ML	431	if len(prs) == 1:
	432	# initial commit subject line 'component: [PRnnnnn]'
	433	m = prnum_regex.match(prs[0])
	434	if m:
	435	title = f'{m.group("comp")}: [PR{m.group("num")}]'
	436	start.insert(0, title)
cf76bbf8	437	if start:
58e3b17f	438	# append empty line
cf76bbf8 ML	439	if start[-1] != '':
	440	start.append('')
	441	else:
	442	# append 2 empty lines
	443	start = 2 * ['']
	444	f.write('\n'.join(start))
	445	f.write('\n')
	446	f.write(output)
	447	f.write('\n'.join(end))
	448	else:
	449	print(output, end='')