3 # Copyright (C) 2020 Free Software Foundation, Inc.
5 # This file is part of GCC.
7 # GCC is free software; you can redistribute it and/or modify it under
8 # the terms of the GNU General Public License as published by the Free
9 # Software Foundation; either version 3, or (at your option) any later
12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING3. If not see
19 # <http://www.gnu.org/licenses/>. */
25 from collections
import defaultdict
27 default_changelog_locations
= {
31 'contrib/header-tools',
67 'libgcc/config/avr/libf7',
68 'libgcc/config/libbid',
140 'gcc/go/gofrontend/',
141 'gcc/testsuite/gdc.test/',
142 'gcc/testsuite/go.test/test/',
145 'libphobos/libdruntime/',
150 wildcard_prefixes
= {
152 'libstdc++-v3/doc/html/',
153 'libstdc++-v3/testsuite/'
162 author_line_regex
= \
163 re
.compile(r
'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)')
164 additional_author_regex
= re
.compile(r
'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)')
165 changelog_regex
= re
.compile(r
'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
166 subject_pr_regex
= re
.compile(r
'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
167 subject_pr2_regex
= re
.compile(r
'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
168 pr_regex
= re
.compile(r
'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
169 dr_regex
= re
.compile(r
'\tDR ([0-9]+)$')
170 star_prefix_regex
= re
.compile(r
'\t\*(?P<spaces>\ *)(?P<content>.*)')
171 end_of_location_regex
= re
.compile(r
'[\[<(:]')
172 item_empty_regex
= re
.compile(r
'\t(\* \S+ )?\(\S+\):\s*$')
173 item_parenthesis_regex
= re
.compile(r
'\t(\*|\(\S+\):)')
174 revert_regex
= re
.compile(r
'This reverts commit (?P<hash>[0-9a-f]+)\.$')
175 cherry_pick_regex
= re
.compile(r
'cherry picked from commit (?P<hash>\w+)')
179 CO_AUTHORED_BY_PREFIX
= 'co-authored-by: '
181 REVIEW_PREFIXES
= ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
182 'acked-by: ', 'tested-by: ', 'reported-by: ',
184 DATE_FORMAT
= '%Y-%m-%d'
187 def decode_path(path
):
188 # When core.quotepath is true (default value), utf8 chars are encoded like:
189 # "b/ko\304\215ka.txt"
191 # The upstream bug is fixed:
192 # https://github.com/gitpython-developers/GitPython/issues/1099
194 # but we still need a workaround for older versions of the library.
195 # Please take a look at the explanation of the transformation:
196 # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
198 if path
.startswith('"') and path
.endswith('"'):
199 return (path
.strip('"').encode('utf8').decode('unicode-escape')
200 .encode('latin-1').decode('utf8'))
206 def __init__(self
, message
, line
=None, details
=None):
207 self
.message
= message
209 self
.details
= details
214 s
+= ': "%s"' % self
.line
218 class ChangeLogEntry
:
219 def __init__(self
, folder
, authors
, prs
):
221 # The 'list.copy()' function is not available before Python 3.3
222 self
.author_lines
= list(authors
)
223 self
.initial_prs
= list(prs
)
227 self
.file_patterns
= []
228 self
.parentheses_stack
= []
230 def parse_file_names(self
):
231 # Whether the content currently processed is between a star prefix the
232 # end of the file list: a colon or an open paren.
235 for line
in self
.lines
:
236 # If this line matches the star prefix, start the location
237 # processing on the information that follows the star.
238 # Note that we need to skip macro names that can be in form of:
240 # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
241 # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
242 # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
244 m
= star_prefix_regex
.match(line
)
245 if m
and len(m
.group('spaces')) == 1:
247 line
= m
.group('content')
250 # Strip everything that is not a filename in "line":
251 # entities "(NAME)", cases "<PATTERN>", conditions
252 # "[COND]", entry text (the colon, if present, and
253 # anything that follows it).
254 m
= end_of_location_regex
.search(line
)
256 line
= line
[:m
.start()]
259 # At this point, all that's left is a list of filenames
260 # separated by commas and whitespaces.
261 for file in line
.split(','):
264 if file.endswith('*'):
265 self
.file_patterns
.append(file[:-1])
267 self
.files
.append(file)
271 for author
in self
.author_lines
:
278 return [author_line
[0] for author_line
in self
.author_lines
]
282 return not self
.lines
and self
.prs
== self
.initial_prs
284 def contains_author(self
, author
):
285 for author_lines
in self
.author_lines
:
286 if author_lines
[0] == author
:
292 def __init__(self
, hexsha
, date
, author
, lines
, modified_files
):
297 self
.modified_files
= modified_files
301 def __init__(self
, info
, commit_to_info_hook
=None, ref_name
=None):
302 self
.original_info
= info
306 self
.changelog_entries
= []
309 self
.top_level_authors
= []
311 self
.top_level_prs
= []
312 self
.subject_prs
= set()
313 self
.cherry_pick_commit
= None
314 self
.revert_commit
= None
315 self
.commit_to_info_hook
= commit_to_info_hook
316 self
.init_changelog_locations(ref_name
)
318 # Skip Update copyright years commits
319 if self
.info
.lines
and self
.info
.lines
[0] == 'Update copyright years.':
322 if self
.info
.lines
and len(self
.info
.lines
) > 1 and self
.info
.lines
[1]:
323 self
.errors
.append(Error('Expected empty second line in commit message', info
.lines
[0]))
325 # Identify first if the commit is a Revert commit
326 for line
in self
.info
.lines
:
327 m
= revert_regex
.fullmatch(line
)
329 self
.revert_commit
= m
.group('hash')
331 if self
.revert_commit
:
332 self
.info
= self
.commit_to_info_hook(self
.revert_commit
)
334 # The following happens for get_email.py:
338 self
.check_commit_email()
340 # Extract PR numbers form the subject line
341 # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
342 if self
.info
.lines
and not self
.revert_commit
:
343 self
.subject_prs
= {m
.group('pr') for m
in subject_pr2_regex
.finditer(info
.lines
[0])}
344 for m
in subject_pr_regex
.finditer(info
.lines
[0]):
345 if not m
.group('component') in bug_components
:
346 self
.errors
.append(Error('invalid PR component in subject', info
.lines
[0]))
347 self
.subject_prs
.add(m
.group('pr'))
349 # Allow complete deletion of ChangeLog files in a commit
350 project_files
= [f
for f
in self
.info
.modified_files
351 if (self
.is_changelog_filename(f
[0], allow_suffix
=True) and f
[1] != 'D')
352 or f
[0] in misc_files
]
353 ignored_files
= [f
for f
in self
.info
.modified_files
354 if self
.in_ignored_location(f
[0])]
355 if len(project_files
) == len(self
.info
.modified_files
):
356 # All modified files are only MISC files
359 err
= 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
360 'should be done separately from normal commits\n' \
361 '(note: ChangeLog entries will be automatically ' \
362 'added by a cron job)'
363 self
.errors
.append(Error(err
))
366 all_are_ignored
= (len(project_files
) + len(ignored_files
)
367 == len(self
.info
.modified_files
))
368 self
.parse_lines(all_are_ignored
)
370 self
.parse_changelog()
371 self
.parse_file_names()
372 self
.check_for_empty_description()
373 self
.check_for_broken_parentheses()
374 self
.deduce_changelog_locations()
375 self
.check_file_patterns()
376 self
.check_line_start()
378 self
.check_mentioned_files()
379 self
.check_for_correct_changelog()
381 self
.errors
.append(Error('PR %s in subject but not in changelog' %
382 ', '.join(self
.subject_prs
), self
.info
.lines
[0]))
386 return not self
.errors
390 return [x
[0] for x
in self
.info
.modified_files
if x
[1] == 'A']
393 def is_changelog_filename(cls
, path
, allow_suffix
=False):
394 basename
= os
.path
.basename(path
)
395 if basename
== 'ChangeLog':
397 elif allow_suffix
and basename
.startswith('ChangeLog'):
402 def find_changelog_location(self
, name
):
403 if name
.startswith('\t'):
405 if name
.endswith(':'):
407 if name
.endswith('/'):
409 return name
if name
in self
.changelog_locations
else None
412 def format_git_author(cls
, author
):
414 return author
.replace('<', ' <')
417 def parse_git_name_status(cls
, string
):
419 for entry
in string
.split('\n'):
420 parts
= entry
.split('\t')
422 if t
== 'A' or t
== 'D' or t
== 'M':
423 modified_files
.append((parts
[1], t
))
424 elif t
.startswith('R'):
425 modified_files
.append((parts
[1], 'D'))
426 modified_files
.append((parts
[2], 'A'))
427 return modified_files
429 def init_changelog_locations(self
, ref_name
):
430 self
.changelog_locations
= list(default_changelog_locations
)
432 version
= sys
.maxsize
433 if 'releases/gcc-' in ref_name
:
434 version
= int(ref_name
.split('-')[-1])
436 # HSA and BRIG were removed in GCC 12
437 self
.changelog_locations
.remove('gcc/brig')
438 self
.changelog_locations
.remove('libhsail-rt')
440 def parse_lines(self
, all_are_ignored
):
441 body
= self
.info
.lines
443 for i
, b
in enumerate(body
):
446 if (changelog_regex
.match(b
) or self
.find_changelog_location(b
)
447 or star_prefix_regex
.match(b
) or pr_regex
.match(b
)
448 or dr_regex
.match(b
) or author_line_regex
.match(b
)
449 or b
.lower().startswith(CO_AUTHORED_BY_PREFIX
)):
450 self
.changes
= body
[i
:]
452 if not all_are_ignored
:
453 self
.errors
.append(Error('cannot find a ChangeLog location in '
456 def parse_changelog(self
):
459 for line
in self
.changes
:
461 if last_entry
and will_deduce
:
464 if line
!= line
.rstrip():
465 self
.errors
.append(Error('trailing whitespace', line
))
466 if len(line
.replace('\t', ' ' * TAB_WIDTH
)) > LINE_LIMIT
:
467 # support long filenames
468 if not line
.startswith('\t* ') or not line
.endswith(':') or ' ' in line
[3:-1]:
469 self
.errors
.append(Error('line exceeds %d character limit'
471 m
= changelog_regex
.match(line
)
473 last_entry
= ChangeLogEntry(m
.group(1).rstrip('/'),
474 self
.top_level_authors
,
476 self
.changelog_entries
.append(last_entry
)
477 elif self
.find_changelog_location(line
):
478 last_entry
= ChangeLogEntry(self
.find_changelog_location(line
),
479 self
.top_level_authors
,
481 self
.changelog_entries
.append(last_entry
)
485 if author_line_regex
.match(line
):
486 m
= author_line_regex
.match(line
)
487 author_tuple
= (m
.group('name'), m
.group('datetime'))
488 elif additional_author_regex
.match(line
):
489 m
= additional_author_regex
.match(line
)
490 if len(m
.group('spaces')) != 4:
491 msg
= 'additional author must be indented with '\
492 'one tab and four spaces'
493 self
.errors
.append(Error(msg
, line
))
495 author_tuple
= (m
.group('name'), None)
496 elif pr_regex
.match(line
):
497 m
= pr_regex
.match(line
)
498 component
= m
.group('component')
501 self
.errors
.append(Error('missing PR component', line
))
503 elif not component
[:-1] in bug_components
:
504 self
.errors
.append(Error('invalid PR component', line
))
507 pr_line
= line
.lstrip()
508 if pr
in self
.subject_prs
:
509 self
.subject_prs
.remove(pr
)
510 elif dr_regex
.match(line
):
511 pr_line
= line
.lstrip()
513 lowered_line
= line
.lower()
514 if lowered_line
.startswith(CO_AUTHORED_BY_PREFIX
):
515 name
= line
[len(CO_AUTHORED_BY_PREFIX
):]
516 author
= self
.format_git_author(name
)
517 self
.co_authors
.append(author
)
519 elif lowered_line
.startswith(REVIEW_PREFIXES
):
522 m
= cherry_pick_regex
.search(line
)
524 commit
= m
.group('hash')
525 if self
.cherry_pick_commit
:
526 msg
= 'multiple cherry pick lines'
527 self
.errors
.append(Error(msg
, line
))
529 self
.cherry_pick_commit
= commit
532 # ChangeLog name will be deduced later
535 self
.top_level_authors
.append(author_tuple
)
538 # append to top_level_prs only when we haven't met
540 if (pr_line
not in self
.top_level_prs
541 and not self
.changelog_entries
):
542 self
.top_level_prs
.append(pr_line
)
545 last_entry
= ChangeLogEntry(None,
546 self
.top_level_authors
,
548 self
.changelog_entries
.append(last_entry
)
551 if not last_entry
.contains_author(author_tuple
[0]):
552 last_entry
.author_lines
.append(author_tuple
)
555 if not line
.startswith('\t'):
556 err
= Error('line should start with a tab', line
)
557 self
.errors
.append(err
)
559 last_entry
.prs
.append(pr_line
)
561 m
= star_prefix_regex
.match(line
)
563 if (len(m
.group('spaces')) != 1 and
564 not last_entry
.parentheses_stack
):
565 msg
= 'one space should follow asterisk'
566 self
.errors
.append(Error(msg
, line
))
568 content
= m
.group('content')
569 parts
= content
.split(':')
571 for needle
in ('()', '[]', '<>'):
572 if ' ' + needle
in parts
[0]:
573 msg
= f
'empty group "{needle}" found'
574 self
.errors
.append(Error(msg
, line
))
575 last_entry
.lines
.append(line
)
576 self
.process_parentheses(last_entry
, line
)
578 if last_entry
.is_empty
:
579 msg
= 'first line should start with a tab, ' \
580 'an asterisk and a space'
581 self
.errors
.append(Error(msg
, line
))
583 last_entry
.lines
.append(line
)
584 self
.process_parentheses(last_entry
, line
)
586 def process_parentheses(self
, last_entry
, line
):
589 last_entry
.parentheses_stack
.append(line
)
591 if not last_entry
.parentheses_stack
:
592 msg
= 'bad wrapping of parenthesis'
593 self
.errors
.append(Error(msg
, line
))
595 del last_entry
.parentheses_stack
[-1]
597 def parse_file_names(self
):
598 for entry
in self
.changelog_entries
:
599 entry
.parse_file_names()
601 def check_file_patterns(self
):
602 for entry
in self
.changelog_entries
:
603 for pattern
in entry
.file_patterns
:
604 name
= os
.path
.join(entry
.folder
, pattern
)
605 if not [name
.startswith(pr
) for pr
in wildcard_prefixes
]:
606 msg
= 'unsupported wildcard prefix'
607 self
.errors
.append(Error(msg
, name
))
609 def check_for_empty_description(self
):
610 for entry
in self
.changelog_entries
:
611 for i
, line
in enumerate(entry
.lines
):
612 if (item_empty_regex
.match(line
) and
613 (i
== len(entry
.lines
) - 1
614 or not entry
.lines
[i
+1].strip()
615 or item_parenthesis_regex
.match(entry
.lines
[i
+1]))):
616 msg
= 'missing description of a change'
617 self
.errors
.append(Error(msg
, line
))
619 def check_for_broken_parentheses(self
):
620 for entry
in self
.changelog_entries
:
621 if entry
.parentheses_stack
:
622 msg
= 'bad parentheses wrapping'
623 self
.errors
.append(Error(msg
, entry
.parentheses_stack
[-1]))
625 def check_line_start(self
):
626 for entry
in self
.changelog_entries
:
627 for line
in entry
.lines
:
628 if line
.startswith('\t '):
629 msg
= 'extra space after tab'
630 self
.errors
.append(Error(msg
, line
))
632 def get_file_changelog_location(self
, changelog_file
):
633 for file in self
.info
.modified_files
:
634 if file[0] == changelog_file
:
635 # root ChangeLog file
637 index
= file[0].find('/' + changelog_file
)
639 return file[0][:index
]
642 def deduce_changelog_locations(self
):
643 for entry
in self
.changelog_entries
:
644 if entry
.folder
is None:
646 for file in entry
.files
:
647 location
= self
.get_file_changelog_location(file)
649 or (location
and location
in self
.changelog_locations
)):
650 if changelog
and changelog
!= location
:
651 msg
= 'could not deduce ChangeLog file, ' \
652 'not unique location'
653 self
.errors
.append(Error(msg
))
656 if changelog
is not None:
657 entry
.folder
= changelog
659 msg
= 'could not deduce ChangeLog file'
660 self
.errors
.append(Error(msg
))
663 def in_ignored_location(cls
, path
):
664 for ignored
in ignored_prefixes
:
665 if path
.startswith(ignored
):
669 def get_changelog_by_path(self
, path
):
670 components
= path
.split('/')
672 if '/'.join(components
) in self
.changelog_locations
:
674 components
= components
[:-1]
675 return '/'.join(components
)
677 def check_mentioned_files(self
):
678 folder_count
= len([x
.folder
for x
in self
.changelog_entries
])
679 assert folder_count
== len(self
.changelog_entries
)
681 mentioned_files
= set()
682 mentioned_patterns
= []
683 used_patterns
= set()
684 for entry
in self
.changelog_entries
:
685 if not entry
.files
and not entry
.file_patterns
:
686 msg
= 'no files mentioned for ChangeLog in directory'
687 self
.errors
.append(Error(msg
, entry
.folder
))
688 assert not entry
.folder
.endswith('/')
689 for file in entry
.files
:
690 if not self
.is_changelog_filename(file):
691 item
= os
.path
.join(entry
.folder
, file)
692 if item
in mentioned_files
:
693 msg
= 'same file specified multiple times'
694 self
.errors
.append(Error(msg
, file))
696 mentioned_files
.add(item
)
697 for pattern
in entry
.file_patterns
:
698 mentioned_patterns
.append(os
.path
.join(entry
.folder
, pattern
))
700 cand
= [x
[0] for x
in self
.info
.modified_files
701 if not self
.is_changelog_filename(x
[0])]
702 changed_files
= set(cand
)
703 for file in sorted(mentioned_files
- changed_files
):
704 msg
= 'unchanged file mentioned in a ChangeLog'
705 candidates
= difflib
.get_close_matches(file, changed_files
, 1)
708 msg
+= f
' (did you mean "{candidates[0]}"?)'
709 details
= '\n'.join(difflib
.Differ().compare([file], [candidates
[0]])).rstrip()
710 self
.errors
.append(Error(msg
, file, details
))
711 auto_add_warnings
= defaultdict(list)
712 for file in sorted(changed_files
- mentioned_files
):
713 if not self
.in_ignored_location(file):
714 if file in self
.new_files
:
715 changelog_location
= self
.get_changelog_by_path(file)
716 # Python2: we cannot use next(filter(...))
717 entries
= filter(lambda x
: x
.folder
== changelog_location
,
718 self
.changelog_entries
)
719 entries
= list(entries
)
720 entry
= entries
[0] if entries
else None
722 prs
= self
.top_level_prs
724 # if all ChangeLog entries have identical PRs
726 if self
.changelog_entries
:
727 prs
= self
.changelog_entries
[0].prs
728 for entry
in self
.changelog_entries
:
732 entry
= ChangeLogEntry(changelog_location
,
733 self
.top_level_authors
,
735 self
.changelog_entries
.append(entry
)
736 # strip prefix of the file
737 assert file.startswith(entry
.folder
)
738 # do not allow auto-addition of New files
739 # for the top-level folder
741 file = file[len(entry
.folder
):].lstrip('/')
742 entry
.lines
.append('\t* %s: New file.' % file)
743 entry
.files
.append(file)
744 auto_add_warnings
[entry
.folder
].append(file)
746 msg
= 'new file in the top-level folder not mentioned in a ChangeLog'
747 self
.errors
.append(Error(msg
, file))
749 used_pattern
= [p
for p
in mentioned_patterns
750 if file.startswith(p
)]
751 used_pattern
= used_pattern
[0] if used_pattern
else None
753 used_patterns
.add(used_pattern
)
755 msg
= 'changed file not mentioned in a ChangeLog'
756 self
.errors
.append(Error(msg
, file))
758 for pattern
in mentioned_patterns
:
759 if pattern
not in used_patterns
:
760 error
= "pattern doesn't match any changed files"
761 self
.errors
.append(Error(error
, pattern
))
762 for entry
, val
in auto_add_warnings
.items():
764 self
.warnings
.append(f
"Auto-added new file '{entry}/{val[0]}'")
766 self
.warnings
.append(f
"Auto-added {len(val)} new files in '{entry}'")
768 def check_for_correct_changelog(self
):
769 for entry
in self
.changelog_entries
:
770 for file in entry
.files
:
771 full_path
= os
.path
.join(entry
.folder
, file)
772 changelog_location
= self
.get_changelog_by_path(full_path
)
773 if changelog_location
!= entry
.folder
:
774 msg
= 'wrong ChangeLog location "%s", should be "%s"'
775 err
= Error(msg
% (entry
.folder
, changelog_location
), file)
776 self
.errors
.append(err
)
779 def format_authors_in_changelog(cls
, authors
, timestamp
, prefix
=''):
781 for i
, author
in enumerate(authors
):
783 output
+= '%s%s %s\n' % (prefix
, timestamp
, author
)
785 output
+= '%s\t %s\n' % (prefix
, author
)
789 def to_changelog_entries(self
, use_commit_ts
=False):
790 current_timestamp
= self
.info
.date
.strftime(DATE_FORMAT
)
791 for entry
in self
.changelog_entries
:
793 timestamp
= entry
.datetime
794 if self
.revert_commit
:
795 timestamp
= current_timestamp
796 orig_date
= self
.original_info
.date
797 current_timestamp
= orig_date
.strftime(DATE_FORMAT
)
798 elif self
.cherry_pick_commit
:
799 info
= self
.commit_to_info_hook(self
.cherry_pick_commit
)
800 # it can happen that it is a cherry-pick for a different
803 timestamp
= info
.date
.strftime(DATE_FORMAT
)
805 timestamp
= current_timestamp
806 elif not timestamp
or use_commit_ts
:
807 timestamp
= current_timestamp
808 authors
= entry
.authors
if entry
.authors
else [self
.info
.author
]
809 # add Co-Authored-By authors to all ChangeLog entries
810 for author
in self
.co_authors
:
811 if author
not in authors
:
812 authors
.append(author
)
814 if self
.cherry_pick_commit
or self
.revert_commit
:
815 original_author
= self
.original_info
.author
816 output
+= self
.format_authors_in_changelog([original_author
],
818 if self
.revert_commit
:
819 output
+= '\tRevert:\n'
821 output
+= '\tBackported from master:\n'
822 output
+= self
.format_authors_in_changelog(authors
,
825 output
+= self
.format_authors_in_changelog(authors
, timestamp
)
827 output
+= '\t%s\n' % pr
828 for line
in entry
.lines
:
829 output
+= line
+ '\n'
830 yield (entry
.folder
, output
.rstrip())
832 def print_output(self
):
833 for entry
, output
in self
.to_changelog_entries():
834 print('------ %s/ChangeLog ------ ' % entry
)
837 def print_errors(self
):
839 for error
in self
.errors
:
842 def print_warnings(self
):
845 for warning
in self
.warnings
:
848 def check_commit_email(self
):
849 # Parse 'Martin Liska <mliska@suse.cz>'
850 email
= self
.info
.author
.split(' ')[-1].strip('<>')
852 # Verify that all characters are ASCII
853 # TODO: Python 3.7 provides a nicer function: isascii
854 if len(email
) != len(email
.encode()):
855 self
.errors
.append(Error(f
'non-ASCII characters in git commit email address ({email})'))