contrib/gcc-changelog/git_commit.py

   1 #!/usr/bin/env python3
   2
   3 # Copyright (C) 2020 Free Software Foundation, Inc.
   4 #
   5 # This file is part of GCC.
   6 #
   7 # GCC is free software; you can redistribute it and/or modify it under
   8 # the terms of the GNU General Public License as published by the Free
   9 # Software Foundation; either version 3, or (at your option) any later
  10 # version.
  11 #
  12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 # for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with GCC; see the file COPYING3.  If not see
  19 # <http://www.gnu.org/licenses/>.  */
  20
  21 import difflib
  22 import os
  23 import re
  24 import sys
  25 from collections import defaultdict
  26
  27 default_changelog_locations = {
  28     'c++tools',
  29     'config',
  30     'contrib',
  31     'contrib/header-tools',
  32     'contrib/reghunt',
  33     'contrib/regression',
  34     'fixincludes',
  35     'gcc/ada',
  36     'gcc/analyzer',
  37     'gcc/brig',
  38     'gcc/c',
  39     'gcc/c-family',
  40     'gcc',
  41     'gcc/cp',
  42     'gcc/d',
  43     'gcc/fortran',
  44     'gcc/go',
  45     'gcc/jit',
  46     'gcc/lto',
  47     'gcc/m2',
  48     'gcc/objc',
  49     'gcc/objcp',
  50     'gcc/po',
  51     'gcc/rust',
  52     'gcc/testsuite',
  53     'gnattools',
  54     'gotools',
  55     'include',
  56     'intl',
  57     'libada',
  58     'libatomic',
  59     'libbacktrace',
  60     'libcc1',
  61     'libcody',
  62     'libcpp',
  63     'libcpp/po',
  64     'libdecnumber',
  65     'libffi',
  66     'libgcc',
  67     'libgcc/config/avr/libf7',
  68     'libgcc/config/libbid',
  69     'libgfortran',
  70     'libgm2',
  71     'libgomp',
  72     'libhsail-rt',
  73     'libiberty',
  74     'libitm',
  75     'libobjc',
  76     'libphobos',
  77     'libquadmath',
  78     'libsanitizer',
  79     'libssp',
  80     'libstdc++-v3',
  81     'libvtv',
  82     'lto-plugin',
  83     'maintainer-scripts',
  84     'zlib'}
  85
  86 bug_components = {
  87     'ada',
  88     'analyzer',
  89     'boehm-gc',
  90     'bootstrap',
  91     'c',
  92     'c++',
  93     'd',
  94     'debug',
  95     'demangler',
  96     'driver',
  97     'fastjar',
  98     'fortran',
  99     'gcov-profile',
 100     'go',
 101     'hsa',
 102     'inline-asm',
 103     'ipa',
 104     'java',
 105     'jit',
 106     'libbacktrace',
 107     'libf2c',
 108     'libffi',
 109     'libfortran',
 110     'libgcc',
 111     'libgcj',
 112     'libgomp',
 113     'libitm',
 114     'libobjc',
 115     'libquadmath',
 116     'libstdc++',
 117     'lto',
 118     'middle-end',
 119     'modula2',
 120     'objc',
 121     'objc++',
 122     'other',
 123     'pch',
 124     'pending',
 125     'plugins',
 126     'preprocessor',
 127     'regression',
 128     'rtl-optimization',
 129     'rust',
 130     'sanitizer',
 131     'spam',
 132     'target',
 133     'testsuite',
 134     'translation',
 135     'tree-optimization',
 136     'web'}
 137
 138 ignored_prefixes = {
 139     'gcc/d/dmd/',
 140     'gcc/go/gofrontend/',
 141     'gcc/testsuite/gdc.test/',
 142     'gcc/testsuite/go.test/test/',
 143     'libffi/',
 144     'libgo/',
 145     'libphobos/libdruntime/',
 146     'libphobos/src/',
 147     'libsanitizer/',
 148     }
 149
 150 wildcard_prefixes = {
 151     'gcc/testsuite/',
 152     'libstdc++-v3/doc/html/',
 153     'libstdc++-v3/testsuite/'
 154     }
 155
 156 misc_files = {
 157     'gcc/DATESTAMP',
 158     'gcc/BASE-VER',
 159     'gcc/DEV-PHASE'
 160     }
 161
 162 author_line_regex = \
 163         re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
 164 additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
 165 changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
 166 subject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
 167 subject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
 168 pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
 169 dr_regex = re.compile(r'\tDR ([0-9]+)$')
 170 star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
 171 end_of_location_regex = re.compile(r'[\[<(:]')
 172 item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
 173 item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
 174 revert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$')
 175 cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
 176
 177 LINE_LIMIT = 100
 178 TAB_WIDTH = 8
 179 CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
 180
 181 REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
 182                    'acked-by: ', 'tested-by: ', 'reported-by: ',
 183                    'suggested-by: ')
 184 DATE_FORMAT = '%Y-%m-%d'
 185
 186
 187 def decode_path(path):
 188     # When core.quotepath is true (default value), utf8 chars are encoded like:
 189     # "b/ko\304\215ka.txt"
 190     #
 191     # The upstream bug is fixed:
 192     # https://github.com/gitpython-developers/GitPython/issues/1099
 193     #
 194     # but we still need a workaround for older versions of the library.
 195     # Please take a look at the explanation of the transformation:
 196     # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
 197
 198     if path.startswith('"') and path.endswith('"'):
 199         return (path.strip('"').encode('utf8').decode('unicode-escape')
 200                 .encode('latin-1').decode('utf8'))
 201     else:
 202         return path
 203
 204
 205 class Error:
 206     def __init__(self, message, line=None, details=None):
 207         self.message = message
 208         self.line = line
 209         self.details = details
 210
 211     def __repr__(self):
 212         s = self.message
 213         if self.line:
 214             s += ': "%s"' % self.line
 215         return s
 216
 217
 218 class ChangeLogEntry:
 219     def __init__(self, folder, authors, prs):
 220         self.folder = folder
 221         # The 'list.copy()' function is not available before Python 3.3
 222         self.author_lines = list(authors)
 223         self.initial_prs = list(prs)
 224         self.prs = list(prs)
 225         self.lines = []
 226         self.files = []
 227         self.file_patterns = []
 228         self.parentheses_stack = []
 229
 230     def parse_file_names(self):
 231         # Whether the content currently processed is between a star prefix the
 232         # end of the file list: a colon or an open paren.
 233         in_location = False
 234
 235         for line in self.lines:
 236             # If this line matches the star prefix, start the location
 237             # processing on the information that follows the star.
 238             # Note that we need to skip macro names that can be in form of:
 239             #
 240             # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
 241             # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
 242             # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
 243             #
 244             m = star_prefix_regex.match(line)
 245             if m and len(m.group('spaces')) == 1:
 246                 in_location = True
 247                 line = m.group('content')
 248
 249             if in_location:
 250                 # Strip everything that is not a filename in "line":
 251                 # entities "(NAME)", cases "<PATTERN>", conditions
 252                 # "[COND]", entry text (the colon, if present, and
 253                 # anything that follows it).
 254                 m = end_of_location_regex.search(line)
 255                 if m:
 256                     line = line[:m.start()]
 257                     in_location = False
 258
 259                 # At this point, all that's left is a list of filenames
 260                 # separated by commas and whitespaces.
 261                 for file in line.split(','):
 262                     file = file.strip()
 263                     if file:
 264                         if file.endswith('*'):
 265                             self.file_patterns.append(file[:-1])
 266                         else:
 267                             self.files.append(file)
 268
 269     @property
 270     def datetime(self):
 271         for author in self.author_lines:
 272             if author[1]:
 273                 return author[1]
 274         return None
 275
 276     @property
 277     def authors(self):
 278         return [author_line[0] for author_line in self.author_lines]
 279
 280     @property
 281     def is_empty(self):
 282         return not self.lines and self.prs == self.initial_prs
 283
 284     def contains_author(self, author):
 285         for author_lines in self.author_lines:
 286             if author_lines[0] == author:
 287                 return True
 288         return False
 289
 290
 291 class GitInfo:
 292     def __init__(self, hexsha, date, author, lines, modified_files):
 293         self.hexsha = hexsha
 294         self.date = date
 295         self.author = author
 296         self.lines = lines
 297         self.modified_files = modified_files
 298
 299
 300 class GitCommit:
 301     def __init__(self, info, commit_to_info_hook=None, ref_name=None):
 302         self.original_info = info
 303         self.info = info
 304         self.message = None
 305         self.changes = None
 306         self.changelog_entries = []
 307         self.errors = []
 308         self.warnings = []
 309         self.top_level_authors = []
 310         self.co_authors = []
 311         self.top_level_prs = []
 312         self.subject_prs = set()
 313         self.cherry_pick_commit = None
 314         self.revert_commit = None
 315         self.commit_to_info_hook = commit_to_info_hook
 316         self.init_changelog_locations(ref_name)
 317
 318         # Skip Update copyright years commits
 319         if self.info.lines and self.info.lines[0] == 'Update copyright years.':
 320             return
 321
 322         if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
 323             self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
 324
 325         # Identify first if the commit is a Revert commit
 326         for line in self.info.lines:
 327             m = revert_regex.fullmatch(line)
 328             if m:
 329                 self.revert_commit = m.group('hash')
 330                 break
 331         if self.revert_commit:
 332             self.info = self.commit_to_info_hook(self.revert_commit)
 333
 334         # The following happens for get_email.py:
 335         if not self.info:
 336             return
 337
 338         self.check_commit_email()
 339
 340         # Extract PR numbers form the subject line
 341         # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
 342         if self.info.lines and not self.revert_commit:
 343             self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
 344             for m in subject_pr_regex.finditer(info.lines[0]):
 345                 if not m.group('component') in bug_components:
 346                     self.errors.append(Error('invalid PR component in subject', info.lines[0]))
 347                 self.subject_prs.add(m.group('pr'))
 348
 349         # Allow complete deletion of ChangeLog files in a commit
 350         project_files = [f for f in self.info.modified_files
 351                          if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
 352                          or f[0] in misc_files]
 353         ignored_files = [f for f in self.info.modified_files
 354                          if self.in_ignored_location(f[0])]
 355         if len(project_files) == len(self.info.modified_files):
 356             # All modified files are only MISC files
 357             return
 358         elif project_files:
 359             err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
 360                   'should be done separately from normal commits\n' \
 361                   '(note: ChangeLog entries will be automatically ' \
 362                   'added by a cron job)'
 363             self.errors.append(Error(err))
 364             return
 365
 366         all_are_ignored = (len(project_files) + len(ignored_files)
 367                            == len(self.info.modified_files))
 368         self.parse_lines(all_are_ignored)
 369         if self.changes:
 370             self.parse_changelog()
 371             self.parse_file_names()
 372             self.check_for_empty_description()
 373             self.check_for_broken_parentheses()
 374             self.deduce_changelog_locations()
 375             self.check_file_patterns()
 376             self.check_line_start()
 377             if not self.errors:
 378                 self.check_mentioned_files()
 379                 self.check_for_correct_changelog()
 380         if self.subject_prs:
 381             self.errors.append(Error('PR %s in subject but not in changelog' %
 382                                      ', '.join(self.subject_prs), self.info.lines[0]))
 383
 384     @property
 385     def success(self):
 386         return not self.errors
 387
 388     @property
 389     def new_files(self):
 390         return [x[0] for x in self.info.modified_files if x[1] == 'A']
 391
 392     @classmethod
 393     def is_changelog_filename(cls, path, allow_suffix=False):
 394         basename = os.path.basename(path)
 395         if basename == 'ChangeLog':
 396             return True
 397         elif allow_suffix and basename.startswith('ChangeLog'):
 398             return True
 399         else:
 400             return False
 401
 402     def find_changelog_location(self, name):
 403         if name.startswith('\t'):
 404             name = name[1:]
 405         if name.endswith(':'):
 406             name = name[:-1]
 407         if name.endswith('/'):
 408             name = name[:-1]
 409         return name if name in self.changelog_locations else None
 410
 411     @classmethod
 412     def format_git_author(cls, author):
 413         assert '<' in author
 414         return author.replace('<', ' <')
 415
 416     @classmethod
 417     def parse_git_name_status(cls, string):
 418         modified_files = []
 419         for entry in string.split('\n'):
 420             parts = entry.split('\t')
 421             t = parts[0]
 422             if t == 'A' or t == 'D' or t == 'M':
 423                 modified_files.append((parts[1], t))
 424             elif t.startswith('R'):
 425                 modified_files.append((parts[1], 'D'))
 426                 modified_files.append((parts[2], 'A'))
 427         return modified_files
 428
 429     def init_changelog_locations(self, ref_name):
 430         self.changelog_locations = list(default_changelog_locations)
 431         if ref_name:
 432             version = sys.maxsize
 433             if 'releases/gcc-' in ref_name:
 434                 version = int(ref_name.split('-')[-1])
 435             if version >= 12:
 436                 # HSA and BRIG were removed in GCC 12
 437                 self.changelog_locations.remove('gcc/brig')
 438                 self.changelog_locations.remove('libhsail-rt')
 439
 440     def parse_lines(self, all_are_ignored):
 441         body = self.info.lines
 442
 443         for i, b in enumerate(body):
 444             if not b:
 445                 continue
 446             if (changelog_regex.match(b) or self.find_changelog_location(b)
 447                     or star_prefix_regex.match(b) or pr_regex.match(b)
 448                     or dr_regex.match(b) or author_line_regex.match(b)
 449                     or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
 450                 self.changes = body[i:]
 451                 return
 452         if not all_are_ignored:
 453             self.errors.append(Error('cannot find a ChangeLog location in '
 454                                      'message'))
 455
 456     def parse_changelog(self):
 457         last_entry = None
 458         will_deduce = False
 459         for line in self.changes:
 460             if not line:
 461                 if last_entry and will_deduce:
 462                     last_entry = None
 463                 continue
 464             if line != line.rstrip():
 465                 self.errors.append(Error('trailing whitespace', line))
 466             if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
 467                 # support long filenames
 468                 if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
 469                     self.errors.append(Error('line exceeds %d character limit'
 470                                              % LINE_LIMIT, line))
 471             m = changelog_regex.match(line)
 472             if m:
 473                 last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
 474                                             self.top_level_authors,
 475                                             self.top_level_prs)
 476                 self.changelog_entries.append(last_entry)
 477             elif self.find_changelog_location(line):
 478                 last_entry = ChangeLogEntry(self.find_changelog_location(line),
 479                                             self.top_level_authors,
 480                                             self.top_level_prs)
 481                 self.changelog_entries.append(last_entry)
 482             else:
 483                 author_tuple = None
 484                 pr_line = None
 485                 if author_line_regex.match(line):
 486                     m = author_line_regex.match(line)
 487                     author_tuple = (m.group('name'), m.group('datetime'))
 488                 elif additional_author_regex.match(line):
 489                     m = additional_author_regex.match(line)
 490                     if len(m.group('spaces')) != 4:
 491                         msg = 'additional author must be indented with '\
 492                               'one tab and four spaces'
 493                         self.errors.append(Error(msg, line))
 494                     else:
 495                         author_tuple = (m.group('name'), None)
 496                 elif pr_regex.match(line):
 497                     m = pr_regex.match(line)
 498                     component = m.group('component')
 499                     pr = m.group('pr')
 500                     if not component:
 501                         self.errors.append(Error('missing PR component', line))
 502                         continue
 503                     elif not component[:-1] in bug_components:
 504                         self.errors.append(Error('invalid PR component', line))
 505                         continue
 506                     else:
 507                         pr_line = line.lstrip()
 508                     if pr in self.subject_prs:
 509                         self.subject_prs.remove(pr)
 510                 elif dr_regex.match(line):
 511                     pr_line = line.lstrip()
 512
 513                 lowered_line = line.lower()
 514                 if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
 515                     name = line[len(CO_AUTHORED_BY_PREFIX):]
 516                     author = self.format_git_author(name)
 517                     self.co_authors.append(author)
 518                     continue
 519                 elif lowered_line.startswith(REVIEW_PREFIXES):
 520                     continue
 521                 else:
 522                     m = cherry_pick_regex.search(line)
 523                     if m:
 524                         commit = m.group('hash')
 525                         if self.cherry_pick_commit:
 526                             msg = 'multiple cherry pick lines'
 527                             self.errors.append(Error(msg, line))
 528                         else:
 529                             self.cherry_pick_commit = commit
 530                         continue
 531
 532                 # ChangeLog name will be deduced later
 533                 if not last_entry:
 534                     if author_tuple:
 535                         self.top_level_authors.append(author_tuple)
 536                         continue
 537                     elif pr_line:
 538                         # append to top_level_prs only when we haven't met
 539                         # a ChangeLog entry
 540                         if (pr_line not in self.top_level_prs
 541                                 and not self.changelog_entries):
 542                             self.top_level_prs.append(pr_line)
 543                         continue
 544                     else:
 545                         last_entry = ChangeLogEntry(None,
 546                                                     self.top_level_authors,
 547                                                     self.top_level_prs)
 548                         self.changelog_entries.append(last_entry)
 549                         will_deduce = True
 550                 elif author_tuple:
 551                     if not last_entry.contains_author(author_tuple[0]):
 552                         last_entry.author_lines.append(author_tuple)
 553                     continue
 554
 555                 if not line.startswith('\t'):
 556                     err = Error('line should start with a tab', line)
 557                     self.errors.append(err)
 558                 elif pr_line:
 559                     last_entry.prs.append(pr_line)
 560                 else:
 561                     m = star_prefix_regex.match(line)
 562                     if m:
 563                         if (len(m.group('spaces')) != 1 and
 564                                 not last_entry.parentheses_stack):
 565                             msg = 'one space should follow asterisk'
 566                             self.errors.append(Error(msg, line))
 567                         else:
 568                             content = m.group('content')
 569                             parts = content.split(':')
 570                             if len(parts) > 1:
 571                                 for needle in ('()', '[]', '<>'):
 572                                     if ' ' + needle in parts[0]:
 573                                         msg = f'empty group "{needle}" found'
 574                                         self.errors.append(Error(msg, line))
 575                             last_entry.lines.append(line)
 576                             self.process_parentheses(last_entry, line)
 577                     else:
 578                         if last_entry.is_empty:
 579                             msg = 'first line should start with a tab, ' \
 580                                   'an asterisk and a space'
 581                             self.errors.append(Error(msg, line))
 582                         else:
 583                             last_entry.lines.append(line)
 584                             self.process_parentheses(last_entry, line)
 585
 586     def process_parentheses(self, last_entry, line):
 587         for c in line:
 588             if c == '(':
 589                 last_entry.parentheses_stack.append(line)
 590             elif c == ')':
 591                 if not last_entry.parentheses_stack:
 592                     msg = 'bad wrapping of parenthesis'
 593                     self.errors.append(Error(msg, line))
 594                 else:
 595                     del last_entry.parentheses_stack[-1]
 596
 597     def parse_file_names(self):
 598         for entry in self.changelog_entries:
 599             entry.parse_file_names()
 600
 601     def check_file_patterns(self):
 602         for entry in self.changelog_entries:
 603             for pattern in entry.file_patterns:
 604                 name = os.path.join(entry.folder, pattern)
 605                 if not [name.startswith(pr) for pr in wildcard_prefixes]:
 606                     msg = 'unsupported wildcard prefix'
 607                     self.errors.append(Error(msg, name))
 608
 609     def check_for_empty_description(self):
 610         for entry in self.changelog_entries:
 611             for i, line in enumerate(entry.lines):
 612                 if (item_empty_regex.match(line) and
 613                     (i == len(entry.lines) - 1
 614                      or not entry.lines[i+1].strip()
 615                      or item_parenthesis_regex.match(entry.lines[i+1]))):
 616                     msg = 'missing description of a change'
 617                     self.errors.append(Error(msg, line))
 618
 619     def check_for_broken_parentheses(self):
 620         for entry in self.changelog_entries:
 621             if entry.parentheses_stack:
 622                 msg = 'bad parentheses wrapping'
 623                 self.errors.append(Error(msg, entry.parentheses_stack[-1]))
 624
 625     def check_line_start(self):
 626         for entry in self.changelog_entries:
 627             for line in entry.lines:
 628                 if line.startswith('\t '):
 629                     msg = 'extra space after tab'
 630                     self.errors.append(Error(msg, line))
 631
 632     def get_file_changelog_location(self, changelog_file):
 633         for file in self.info.modified_files:
 634             if file[0] == changelog_file:
 635                 # root ChangeLog file
 636                 return ''
 637             index = file[0].find('/' + changelog_file)
 638             if index != -1:
 639                 return file[0][:index]
 640         return None
 641
 642     def deduce_changelog_locations(self):
 643         for entry in self.changelog_entries:
 644             if entry.folder is None:
 645                 changelog = None
 646                 for file in entry.files:
 647                     location = self.get_file_changelog_location(file)
 648                     if (location == ''
 649                        or (location and location in self.changelog_locations)):
 650                         if changelog and changelog != location:
 651                             msg = 'could not deduce ChangeLog file, ' \
 652                                   'not unique location'
 653                             self.errors.append(Error(msg))
 654                             return
 655                         changelog = location
 656                 if changelog is not None:
 657                     entry.folder = changelog
 658                 else:
 659                     msg = 'could not deduce ChangeLog file'
 660                     self.errors.append(Error(msg))
 661
 662     @classmethod
 663     def in_ignored_location(cls, path):
 664         for ignored in ignored_prefixes:
 665             if path.startswith(ignored):
 666                 return True
 667         return False
 668
 669     def get_changelog_by_path(self, path):
 670         components = path.split('/')
 671         while components:
 672             if '/'.join(components) in self.changelog_locations:
 673                 break
 674             components = components[:-1]
 675         return '/'.join(components)
 676
 677     def check_mentioned_files(self):
 678         folder_count = len([x.folder for x in self.changelog_entries])
 679         assert folder_count == len(self.changelog_entries)
 680
 681         mentioned_files = set()
 682         mentioned_patterns = []
 683         used_patterns = set()
 684         for entry in self.changelog_entries:
 685             if not entry.files and not entry.file_patterns:
 686                 msg = 'no files mentioned for ChangeLog in directory'
 687                 self.errors.append(Error(msg, entry.folder))
 688             assert not entry.folder.endswith('/')
 689             for file in entry.files:
 690                 if not self.is_changelog_filename(file):
 691                     item = os.path.join(entry.folder, file)
 692                     if item in mentioned_files:
 693                         msg = 'same file specified multiple times'
 694                         self.errors.append(Error(msg, file))
 695                     else:
 696                         mentioned_files.add(item)
 697             for pattern in entry.file_patterns:
 698                 mentioned_patterns.append(os.path.join(entry.folder, pattern))
 699
 700         cand = [x[0] for x in self.info.modified_files
 701                 if not self.is_changelog_filename(x[0])]
 702         changed_files = set(cand)
 703         for file in sorted(mentioned_files - changed_files):
 704             msg = 'unchanged file mentioned in a ChangeLog'
 705             candidates = difflib.get_close_matches(file, changed_files, 1)
 706             details = None
 707             if candidates:
 708                 msg += f' (did you mean "{candidates[0]}"?)'
 709                 details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
 710             self.errors.append(Error(msg, file, details))
 711         auto_add_warnings = defaultdict(list)
 712         for file in sorted(changed_files - mentioned_files):
 713             if not self.in_ignored_location(file):
 714                 if file in self.new_files:
 715                     changelog_location = self.get_changelog_by_path(file)
 716                     # Python2: we cannot use next(filter(...))
 717                     entries = filter(lambda x: x.folder == changelog_location,
 718                                      self.changelog_entries)
 719                     entries = list(entries)
 720                     entry = entries[0] if entries else None
 721                     if not entry:
 722                         prs = self.top_level_prs
 723                         if not prs:
 724                             # if all ChangeLog entries have identical PRs
 725                             # then use them
 726                             if self.changelog_entries:
 727                                 prs = self.changelog_entries[0].prs
 728                                 for entry in self.changelog_entries:
 729                                     if entry.prs != prs:
 730                                         prs = []
 731                                         break
 732                         entry = ChangeLogEntry(changelog_location,
 733                                                self.top_level_authors,
 734                                                prs)
 735                         self.changelog_entries.append(entry)
 736                     # strip prefix of the file
 737                     assert file.startswith(entry.folder)
 738                     # do not allow auto-addition of New files
 739                     # for the top-level folder
 740                     if entry.folder:
 741                         file = file[len(entry.folder):].lstrip('/')
 742                         entry.lines.append('\t* %s: New file.' % file)
 743                         entry.files.append(file)
 744                         auto_add_warnings[entry.folder].append(file)
 745                     else:
 746                         msg = 'new file in the top-level folder not mentioned in a ChangeLog'
 747                         self.errors.append(Error(msg, file))
 748                 else:
 749                     used_pattern = [p for p in mentioned_patterns
 750                                     if file.startswith(p)]
 751                     used_pattern = used_pattern[0] if used_pattern else None
 752                     if used_pattern:
 753                         used_patterns.add(used_pattern)
 754                     else:
 755                         msg = 'changed file not mentioned in a ChangeLog'
 756                         self.errors.append(Error(msg, file))
 757
 758         for pattern in mentioned_patterns:
 759             if pattern not in used_patterns:
 760                 error = "pattern doesn't match any changed files"
 761                 self.errors.append(Error(error, pattern))
 762         for entry, val in auto_add_warnings.items():
 763             if len(val) == 1:
 764                 self.warnings.append(f"Auto-added new file '{entry}/{val[0]}'")
 765             else:
 766                 self.warnings.append(f"Auto-added {len(val)} new files in '{entry}'")
 767
 768     def check_for_correct_changelog(self):
 769         for entry in self.changelog_entries:
 770             for file in entry.files:
 771                 full_path = os.path.join(entry.folder, file)
 772                 changelog_location = self.get_changelog_by_path(full_path)
 773                 if changelog_location != entry.folder:
 774                     msg = 'wrong ChangeLog location "%s", should be "%s"'
 775                     err = Error(msg % (entry.folder, changelog_location), file)
 776                     self.errors.append(err)
 777
 778     @classmethod
 779     def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
 780         output = ''
 781         for i, author in enumerate(authors):
 782             if i == 0:
 783                 output += '%s%s  %s\n' % (prefix, timestamp, author)
 784             else:
 785                 output += '%s\t    %s\n' % (prefix, author)
 786         output += '\n'
 787         return output
 788
 789     def to_changelog_entries(self, use_commit_ts=False):
 790         current_timestamp = self.info.date.strftime(DATE_FORMAT)
 791         for entry in self.changelog_entries:
 792             output = ''
 793             timestamp = entry.datetime
 794             if self.revert_commit:
 795                 timestamp = current_timestamp
 796                 orig_date = self.original_info.date
 797                 current_timestamp = orig_date.strftime(DATE_FORMAT)
 798             elif self.cherry_pick_commit:
 799                 info = self.commit_to_info_hook(self.cherry_pick_commit)
 800                 # it can happen that it is a cherry-pick for a different
 801                 # repository
 802                 if info:
 803                     timestamp = info.date.strftime(DATE_FORMAT)
 804                 else:
 805                     timestamp = current_timestamp
 806             elif not timestamp or use_commit_ts:
 807                 timestamp = current_timestamp
 808             authors = entry.authors if entry.authors else [self.info.author]
 809             # add Co-Authored-By authors to all ChangeLog entries
 810             for author in self.co_authors:
 811                 if author not in authors:
 812                     authors.append(author)
 813
 814             if self.cherry_pick_commit or self.revert_commit:
 815                 original_author = self.original_info.author
 816                 output += self.format_authors_in_changelog([original_author],
 817                                                            current_timestamp)
 818                 if self.revert_commit:
 819                     output += '\tRevert:\n'
 820                 else:
 821                     output += '\tBackported from master:\n'
 822                 output += self.format_authors_in_changelog(authors,
 823                                                            timestamp, '\t')
 824             else:
 825                 output += self.format_authors_in_changelog(authors, timestamp)
 826             for pr in entry.prs:
 827                 output += '\t%s\n' % pr
 828             for line in entry.lines:
 829                 output += line + '\n'
 830             yield (entry.folder, output.rstrip())
 831
 832     def print_output(self):
 833         for entry, output in self.to_changelog_entries():
 834             print('------ %s/ChangeLog ------ ' % entry)
 835             print(output)
 836
 837     def print_errors(self):
 838         print('Errors:')
 839         for error in self.errors:
 840             print(error)
 841
 842     def print_warnings(self):
 843         if self.warnings:
 844             print('Warnings:')
 845             for warning in self.warnings:
 846                 print(warning)
 847
 848     def check_commit_email(self):
 849         # Parse 'Martin Liska  <mliska@suse.cz>'
 850         email = self.info.author.split(' ')[-1].strip('<>')
 851
 852         # Verify that all characters are ASCII
 853         # TODO: Python 3.7 provides a nicer function: isascii
 854         if len(email) != len(email.encode()):
 855             self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))