Avoid assuming maximum string length is constant [PR102960].

[thirdparty/gcc.git] / contrib / mklog.py
diff --git a/contrib/mklog.py b/contrib/mklog.py

index 45559afbe6b5f2ba03cd2a85d3b969febfdcf32d..cd5ef0bcc74b90a86aa68752fd827a49aa854fe4 100755 (executable)
--- a/contrib/mklog.py
+++ b/contrib/mklog.py
@@ -27,17 +27,26 @@
  # Author: Martin Liska <mliska@suse.cz>
  
  import argparse
+import datetime
  import os
  import re
+import subprocess
  import sys
-
-import bs4
+from itertools import takewhile
  
  import requests
  
  from unidiff import PatchSet
  
+LINE_LIMIT = 100
+TAB_WIDTH = 8
+CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
+
  pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
+prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
+dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
+dg_regex = re.compile(r'{\s+dg-(error|warning)')
+pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
  identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
  comment_regex = re.compile(r'^\/\*')
  struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
@@ -46,8 +55,14 @@ macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
  super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
  fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
  template_and_param_regex = re.compile(r'<[^<>]*>')
+md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
+bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
+               'include_fields=summary,component'
  
-function_extensions = set(['.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def'])
+function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
+
+# NB: Makefile.in isn't listed as it's not always generated.
+generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
  
  help_message = """\
  Generate ChangeLog template for PATCH.
@@ -56,13 +71,15 @@ PATCH must be generated using diff(1)'s -up or -cp options
  """
  
  script_folder = os.path.realpath(__file__)
-gcc_root = os.path.dirname(os.path.dirname(script_folder))
+root = os.path.dirname(os.path.dirname(script_folder))
+
+firstpr = ''
  
  
  def find_changelog(path):
      folder = os.path.split(path)[0]
      while True:
-        if os.path.exists(os.path.join(gcc_root, folder, 'ChangeLog')):
+        if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
              return folder
          folder = os.path.dirname(folder)
          if folder == '':
@@ -106,29 +123,57 @@ def sort_changelog_files(changed_file):
  
  
  def get_pr_titles(prs):
-    if not prs:
-        return ''
-
-    output = ''
-    for pr in prs:
-        id = pr.split('/')[-1]
-        r = requests.get('https://gcc.gnu.org/PR%s' % id)
-        html = bs4.BeautifulSoup(r.text, features='lxml')
-        title = html.title.text
-        title = title[title.find('–') + 1:].strip()
-        output += '%s - %s\n' % (pr, title)
-    output += '\n'
-    return output
-
-
-def generate_changelog(data, no_functions=False, fill_pr_titles=False):
+    output = []
+    for idx, pr in enumerate(prs):
+        pr_id = pr.split('/')[-1]
+        r = requests.get(bugzilla_url % pr_id)
+        bugs = r.json()['bugs']
+        if len(bugs) == 1:
+            prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
+            out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
+            if out not in output:
+                output.append(out)
+    if output:
+        output.append('')
+    return '\n'.join(output)
+
+
+def append_changelog_line(out, relative_path, text):
+    line = f'\t* {relative_path}:'
+    if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
+        out += f'{line} {text}\n'
+    else:
+        out += f'{line}\n'
+        out += f'\t{text}\n'
+    return out
+
+
+def get_rel_path_if_prefixed(path, folder):
+    if path.startswith(folder):
+        return path[len(folder):].lstrip('/')
+    else:
+        return path
+
+
+def generate_changelog(data, no_functions=False, fill_pr_titles=False,
+                       additional_prs=None):
      changelogs = {}
      changelog_list = []
      prs = []
      out = ''
      diff = PatchSet(data)
+    global firstpr
  
+    if additional_prs:
+        for apr in additional_prs:
+            if not apr.startswith('PR ') and '/' in apr:
+                apr = 'PR ' + apr
+            if apr not in prs:
+                prs.append(apr)
      for file in diff:
+        # skip files that can't be parsed
+        if file.path == '/dev/null':
+            continue
          changelog = find_changelog(file.path)
          if changelog not in changelogs:
              changelogs[changelog] = []
@@ -137,36 +182,81 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False):
  
          # Extract PR entries from newly added tests
          if 'testsuite' in file.path and file.is_added_file:
-            for line in list(file)[0]:
+            # Only search first ten lines as later lines may
+            # contains commented code which a note that it
+            # has not been tested due to a certain PR or DR.
+            this_file_prs = []
+            for line in list(file)[0][0:10]:
                  m = pr_regex.search(line.value)
                  if m:
                      pr = m.group('pr')
                      if pr not in prs:
                          prs.append(pr)
+                        this_file_prs.append(pr.split('/')[-1])
                  else:
-                    break
+                    m = dr_regex.search(line.value)
+                    if m:
+                        dr = m.group('dr')
+                        if dr not in prs:
+                            prs.append(dr)
+                            this_file_prs.append(dr.split('/')[-1])
+                    elif dg_regex.search(line.value):
+                        # Found dg-warning/dg-error line
+                        break
+            # PR number in the file name
+            fname = os.path.basename(file.path)
+            m = pr_filename_regex.search(fname)
+            if m:
+                pr = m.group('pr')
+                pr2 = 'PR ' + pr
+                if pr not in this_file_prs and pr2 not in prs:
+                    prs.append(pr2)
+
+    if prs:
+        firstpr = prs[0]
  
      if fill_pr_titles:
          out += get_pr_titles(prs)
  
+    # print list of PR entries before ChangeLog entries
+    if prs:
+        if not out:
+            out += '\n'
+        for pr in prs:
+            out += '\t%s\n' % pr
+        out += '\n'
+
      # sort ChangeLog so that 'testsuite' is at the end
      for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
          files = changelogs[changelog]
          out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
          out += '\n'
-        for pr in prs:
-            out += '\t%s\n' % pr
          # new and deleted files should be at the end
          for file in sorted(files, key=sort_changelog_files):
              assert file.path.startswith(changelog)
              in_tests = 'testsuite' in changelog or 'testsuite' in file.path
-            relative_path = file.path[len(changelog):].lstrip('/')
+            relative_path = get_rel_path_if_prefixed(file.path, changelog)
              functions = []
              if file.is_added_file:
-                msg = 'New test' if in_tests else 'New file'
-                out += '\t* %s: %s.\n' % (relative_path, msg)
+                msg = 'New test.' if in_tests else 'New file.'
+                out = append_changelog_line(out, relative_path, msg)
              elif file.is_removed_file:
-                out += '\t* %s: Removed.\n' % (relative_path)
+                out = append_changelog_line(out, relative_path, 'Removed.')
+            elif hasattr(file, 'is_rename') and file.is_rename:
+                # A file can be theoretically moved to a location that
+                # belongs to a different ChangeLog.  Let user fix it.
+                #
+                # Since unidiff 0.7.0, path.file == path.target_file[2:],
+                # it used to be path.source_file[2:]
+                relative_path = get_rel_path_if_prefixed(file.source_file[2:],
+                                                         changelog)
+                out = append_changelog_line(out, relative_path, 'Moved to...')
+                new_path = get_rel_path_if_prefixed(file.target_file[2:],
+                                                    changelog)
+                out += f'\t* {new_path}: ...here.\n'
+            elif os.path.basename(file.path) in generated_files:
+                out += '\t* %s: Regenerate.\n' % (relative_path)
+                append_changelog_line(out, relative_path, 'Regenerate.')
              else:
                  if not no_functions:
                      for hunk in file:
@@ -179,6 +269,15 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False):
                              for line in hunk:
                                  m = identifier_regex.match(line.value)
                                  if line.is_added or line.is_removed:
+                                    # special-case definition in .md files
+                                    m2 = md_def_regex.match(line.value)
+                                    if extension == '.md' and m2:
+                                        fn = m2.group(1)
+                                        if fn not in functions:
+                                            functions.append(fn)
+                                            last_fn = None
+                                            success = True
+
                                      if not line.value.strip():
                                          continue
                                      modified_visited = True
@@ -208,20 +307,86 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False):
      return out
  
  
+def update_copyright(data):
+    current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
+    username = subprocess.check_output('git config user.name', shell=True,
+                                       encoding='utf8').strip()
+    email = subprocess.check_output('git config user.email', shell=True,
+                                    encoding='utf8').strip()
+
+    changelogs = set()
+    diff = PatchSet(data)
+
+    for file in diff:
+        changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
+        if changelog not in changelogs:
+            changelogs.add(changelog)
+            with open(changelog) as f:
+                content = f.read()
+            with open(changelog, 'w+') as f:
+                f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
+                f.write('\tUpdate copyright years.\n\n')
+                f.write(content)
+
+
+def skip_line_in_changelog(line):
+    if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'):
+        return False
+    return True
+
+
  if __name__ == '__main__':
      parser = argparse.ArgumentParser(description=help_message)
      parser.add_argument('input', nargs='?',
                          help='Patch file (or missing, read standard input)')
+    parser.add_argument('-b', '--pr-numbers', action='store',
+                        type=lambda arg: arg.split(','), nargs='?',
+                        help='Add the specified PRs (comma separated)')
      parser.add_argument('-s', '--no-functions', action='store_true',
                          help='Do not generate function names in ChangeLogs')
      parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
                          help='Download title of mentioned PRs')
+    parser.add_argument('-d', '--directory',
+                        help='Root directory where to search for ChangeLog '
+                        'files')
+    parser.add_argument('-c', '--changelog',
+                        help='Append the ChangeLog to a git commit message '
+                             'file')
+    parser.add_argument('--update-copyright', action='store_true',
+                        help='Update copyright in ChangeLog files')
      args = parser.parse_args()
      if args.input == '-':
          args.input = None
+    if args.directory:
+        root = args.directory
  
-    input = open(args.input) if args.input else sys.stdin
-    data = input.read()
-    output = generate_changelog(data, args.no_functions,
-                                args.fill_up_bug_titles)
-    print(output, end='')
+    data = open(args.input) if args.input else sys.stdin
+    if args.update_copyright:
+        update_copyright(data)
+    else:
+        output = generate_changelog(data, args.no_functions,
+                                    args.fill_up_bug_titles, args.pr_numbers)
+        if args.changelog:
+            lines = open(args.changelog).read().split('\n')
+            start = list(takewhile(skip_line_in_changelog, lines))
+            end = lines[len(start):]
+            with open(args.changelog, 'w') as f:
+                if not start or not start[0]:
+                    # initial commit subject line 'component: [PRnnnnn]'
+                    m = prnum_regex.match(firstpr)
+                    if m:
+                        title = f'{m.group("comp")}: [PR{m.group("num")}]'
+                        start.insert(0, title)
+                if start:
+                    # append empty line
+                    if start[-1] != '':
+                        start.append('')
+                else:
+                    # append 2 empty lines
+                    start = 2 * ['']
+                f.write('\n'.join(start))
+                f.write('\n')
+                f.write(output)
+                f.write('\n'.join(end))
+        else:
+            print(output, end='')