2 # Copy of https://github.com/llvm-mirror/clang/blob/master/tools/clang-format/git-clang-format
3 # Adds a --diffstat option to show the files needing formatting.
4 # This change will be upstreamed, but the current git-clang-format does not
5 # have it yet. We use it in the internal scripts/clang-format.sh
7 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
9 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10 # See https://llvm.org/LICENSE.txt for license information.
11 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
13 #===------------------------------------------------------------------------===#
16 clang-format git integration
17 ============================
19 This file provides a clang-format integration for git. Put it somewhere in your
20 path and ensure that it is executable. Then, "git clang-format" will invoke
21 clang-format on the changes in current files or a specific commit.
23 For further details, run:
26 Requires Python 2.7 or Python 3
29 from __future__
import absolute_import
, division
, print_function
39 usage
= 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
42 If zero or one commits are given, run clang-format on all lines that differ
43 between the working directory and <commit>, which defaults to HEAD. Changes are
44 only applied to the working directory.
46 If two commits are given (requires --diff), run clang-format on all lines in the
47 second <commit> that differ from the first <commit>.
49 The following git-config settings set the default of the corresponding option:
52 clangFormat.extensions
56 # Name of the temporary index file in which save the output of clang-format.
57 # This file is created within the .git directory.
58 temp_index_basename
= 'clang-format-index'
61 Range
= collections
.namedtuple('Range', 'start, count')
65 config
= load_git_config()
67 # In order to keep '--' yet allow options after positionals, we need to
68 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
69 # nargs=argparse.REMAINDER disallows options after positionals.)
72 idx
= argv
.index('--')
76 dash_dash
= argv
[idx
:]
79 default_extensions
= ','.join([
80 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
84 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++
86 # Other languages that clang-format supports
87 'proto', 'protodevel', # Protocol Buffers
94 p
= argparse
.ArgumentParser(
95 usage
=usage
, formatter_class
=argparse
.RawDescriptionHelpFormatter
,
97 p
.add_argument('--binary',
98 default
=config
.get('clangformat.binary', 'clang-format'),
99 help='path to clang-format'),
100 p
.add_argument('--commit',
101 default
=config
.get('clangformat.commit', 'HEAD'),
102 help='default commit to use if none is specified'),
103 p
.add_argument('--diff', action
='store_true',
104 help='print a diff instead of applying the changes')
105 p
.add_argument('--diffstat', action
='store_true',
106 help='print diffstat instead of applying the changes')
107 p
.add_argument('--extensions',
108 default
=config
.get('clangformat.extensions',
110 help=('comma-separated list of file extensions to format, '
111 'excluding the period and case-insensitive')),
112 p
.add_argument('-f', '--force', action
='store_true',
113 help='allow changes to unstaged files')
114 p
.add_argument('-p', '--patch', action
='store_true',
115 help='select hunks interactively')
116 p
.add_argument('-q', '--quiet', action
='count', default
=0,
117 help='print less information')
118 p
.add_argument('--style',
119 default
=config
.get('clangformat.style', None),
120 help='passed to clang-format'),
121 p
.add_argument('-v', '--verbose', action
='count', default
=0,
122 help='print extra information')
123 # We gather all the remaining positional arguments into 'args' since we need
124 # to use some heuristics to determine whether or not <commit> was present.
125 # However, to print pretty messages, we make use of metavar and help.
126 p
.add_argument('args', nargs
='*', metavar
='<commit>',
127 help='revision from which to compute the diff')
128 p
.add_argument('ignored', nargs
='*', metavar
='<file>...',
129 help='if specified, only consider differences in these files')
130 opts
= p
.parse_args(argv
)
132 opts
.verbose
-= opts
.quiet
135 commits
, files
= interpret_args(opts
.args
, dash_dash
, opts
.commit
)
138 die('--diff is required when two commits are given')
141 die('at most two commits allowed; %d given' % len(commits
))
142 changed_lines
= compute_diff_and_extract_lines(commits
, files
)
143 if opts
.verbose
>= 1:
144 ignored_files
= set(changed_lines
)
145 filter_by_extension(changed_lines
, opts
.extensions
.lower().split(','))
146 if opts
.verbose
>= 1:
147 ignored_files
.difference_update(changed_lines
)
149 print('Ignoring changes in the following files (wrong extension):')
150 for filename
in ignored_files
:
151 print(' %s' % filename
)
153 print('Running clang-format on the following files:')
154 for filename
in changed_lines
:
155 print(' %s' % filename
)
156 if not changed_lines
:
157 print('no modified files to format')
159 # The computed diff outputs absolute paths, so we must cd before accessing
163 old_tree
= commits
[1]
164 new_tree
= run_clang_format_and_save_to_tree(changed_lines
,
169 old_tree
= create_tree_from_workdir(changed_lines
)
170 new_tree
= run_clang_format_and_save_to_tree(changed_lines
,
173 if opts
.verbose
>= 1:
174 print('old tree: %s' % old_tree
)
175 print('new tree: %s' % new_tree
)
176 if old_tree
== new_tree
:
177 if opts
.verbose
>= 0:
178 print('clang-format did not modify any files')
180 print_diff(old_tree
, new_tree
)
182 print_diffstat(old_tree
, new_tree
)
184 changed_files
= apply_changes(old_tree
, new_tree
, force
=opts
.force
,
185 patch_mode
=opts
.patch
)
186 if (opts
.verbose
>= 0 and not opts
.patch
) or opts
.verbose
>= 1:
187 print('changed files:')
188 for filename
in changed_files
:
189 print(' %s' % filename
)
192 def load_git_config(non_string_options
=None):
193 """Return the git configuration as a dictionary.
195 All options are assumed to be strings unless in `non_string_options`, in which
196 is a dictionary mapping option name (in lower case) to either "--bool" or
198 if non_string_options
is None:
199 non_string_options
= {}
201 for entry
in run('git', 'config', '--list', '--null').split('\0'):
203 name
, value
= entry
.split('\n', 1)
204 if name
in non_string_options
:
205 value
= run('git', 'config', non_string_options
[name
], name
)
210 def interpret_args(args
, dash_dash
, default_commit
):
211 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
213 It is assumed that "--" and everything that follows has been removed from
214 args and placed in `dash_dash`.
216 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
217 left (if present) are taken as commits. Otherwise, the arguments are checked
218 from left to right if they are commits or files. If commits are not given,
219 a list with `default_commit` is used."""
222 commits
= [default_commit
]
225 for commit
in commits
:
226 object_type
= get_object_type(commit
)
227 if object_type
not in ('commit', 'tag'):
228 if object_type
is None:
229 die("'%s' is not a commit" % commit
)
231 die("'%s' is a %s, but a commit was expected" % (commit
, object_type
))
232 files
= dash_dash
[1:]
236 if not disambiguate_revision(args
[0]):
238 commits
.append(args
.pop(0))
240 commits
= [default_commit
]
243 commits
= [default_commit
]
245 return commits
, files
248 def disambiguate_revision(value
):
249 """Returns True if `value` is a revision, False if it is a file, or dies."""
250 # If `value` is ambiguous (neither a commit nor a file), the following
251 # command will die with an appropriate error message.
252 run('git', 'rev-parse', value
, verbose
=False)
253 object_type
= get_object_type(value
)
254 if object_type
is None:
256 if object_type
in ('commit', 'tag'):
258 die('`%s` is a %s, but a commit or filename was expected' %
259 (value
, object_type
))
262 def get_object_type(value
):
263 """Returns a string description of an object's type, or None if it is not
264 a valid git object."""
265 cmd
= ['git', 'cat-file', '-t', value
]
266 p
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
267 stdout
, stderr
= p
.communicate()
268 if p
.returncode
!= 0:
270 return convert_string(stdout
.strip())
273 def compute_diff_and_extract_lines(commits
, files
):
274 """Calls compute_diff() followed by extract_lines()."""
275 diff_process
= compute_diff(commits
, files
)
276 changed_lines
= extract_lines(diff_process
.stdout
)
277 diff_process
.stdout
.close()
279 if diff_process
.returncode
!= 0:
280 # Assume error was already printed to stderr.
285 def compute_diff(commits
, files
):
286 """Return a subprocess object producing the diff from `commits`.
288 The return value's `stdin` file object will produce a patch with the
289 differences between the working directory and the first commit if a single
290 one was specified, or the difference between both specified commits, filtered
291 on `files` (if non-empty). Zero context lines are used in the patch."""
292 git_tool
= 'diff-index'
294 git_tool
= 'diff-tree'
295 cmd
= ['git', git_tool
, '-p', '-U0'] + commits
+ ['--']
297 p
= subprocess
.Popen(cmd
, stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
302 def extract_lines(patch_file
):
303 """Extract the changed lines in `patch_file`.
305 The return value is a dictionary mapping filename to a list of (start_line,
308 The input must have been produced with ``-U0``, meaning unidiff format with
309 zero lines of context. The return value is a dict mapping filename to a
310 list of line `Range`s."""
312 for line
in patch_file
:
313 line
= convert_string(line
)
314 match
= re
.search(r
'^\+\+\+\ [^/]+/(.*)', line
)
316 filename
= match
.group(1).rstrip('\r\n')
317 match
= re
.search(r
'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line
)
319 start_line
= int(match
.group(1))
322 line_count
= int(match
.group(3))
324 matches
.setdefault(filename
, []).append(Range(start_line
, line_count
))
328 def filter_by_extension(dictionary
, allowed_extensions
):
329 """Delete every key in `dictionary` that doesn't have an allowed extension.
331 `allowed_extensions` must be a collection of lowercase file extensions,
332 excluding the period."""
333 allowed_extensions
= frozenset(allowed_extensions
)
334 for filename
in list(dictionary
.keys()):
335 base_ext
= filename
.rsplit('.', 1)
336 if len(base_ext
) == 1 and '' in allowed_extensions
:
338 if len(base_ext
) == 1 or base_ext
[1].lower() not in allowed_extensions
:
339 del dictionary
[filename
]
342 def cd_to_toplevel():
343 """Change to the top level of the git repository."""
344 toplevel
= run('git', 'rev-parse', '--show-toplevel')
348 def create_tree_from_workdir(filenames
):
349 """Create a new git tree with the given files from the working directory.
351 Returns the object ID (SHA-1) of the created tree."""
352 return create_tree(filenames
, '--stdin')
355 def run_clang_format_and_save_to_tree(changed_lines
, revision
=None,
356 binary
='clang-format', style
=None):
357 """Run clang-format on each file and save the result to a git tree.
359 Returns the object ID (SHA-1) of the created tree."""
360 def iteritems(container
):
362 return container
.iteritems() # Python 2
363 except AttributeError:
364 return container
.items() # Python 3
365 def index_info_generator():
366 for filename
, line_ranges
in iteritems(changed_lines
):
368 git_metadata_cmd
= ['git', 'ls-tree',
369 '%s:%s' % (revision
, os
.path
.dirname(filename
)),
370 os
.path
.basename(filename
)]
371 git_metadata
= subprocess
.Popen(git_metadata_cmd
, stdin
=subprocess
.PIPE
,
372 stdout
=subprocess
.PIPE
)
373 stdout
= git_metadata
.communicate()[0]
374 mode
= oct(int(stdout
.split()[0], 8))
376 mode
= oct(os
.stat(filename
).st_mode
)
377 # Adjust python3 octal format so that it matches what git expects
378 if mode
.startswith('0o'):
379 mode
= '0' + mode
[2:]
380 blob_id
= clang_format_to_blob(filename
, line_ranges
,
384 yield '%s %s\t%s' % (mode
, blob_id
, filename
)
385 return create_tree(index_info_generator(), '--index-info')
388 def create_tree(input_lines
, mode
):
389 """Create a tree object from the given input.
391 If mode is '--stdin', it must be a list of filenames. If mode is
392 '--index-info' is must be a list of values suitable for "git update-index
393 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
395 assert mode
in ('--stdin', '--index-info')
396 cmd
= ['git', 'update-index', '--add', '-z', mode
]
397 with
temporary_index_file():
398 p
= subprocess
.Popen(cmd
, stdin
=subprocess
.PIPE
)
399 for line
in input_lines
:
400 p
.stdin
.write(to_bytes('%s\0' % line
))
403 die('`%s` failed' % ' '.join(cmd
))
404 tree_id
= run('git', 'write-tree')
408 def clang_format_to_blob(filename
, line_ranges
, revision
=None,
409 binary
='clang-format', style
=None):
410 """Run clang-format on the given file and save the result to a git blob.
412 Runs on the file in `revision` if not None, or on the file in the working
413 directory if `revision` is None.
415 Returns the object ID (SHA-1) of the created blob."""
416 clang_format_cmd
= [binary
]
418 clang_format_cmd
.extend(['-style='+style
])
419 clang_format_cmd
.extend([
420 '-lines=%s:%s' % (start_line
, start_line
+line_count
-1)
421 for start_line
, line_count
in line_ranges
])
423 clang_format_cmd
.extend(['-assume-filename='+filename
])
424 git_show_cmd
= ['git', 'cat-file', 'blob', '%s:%s' % (revision
, filename
)]
425 git_show
= subprocess
.Popen(git_show_cmd
, stdin
=subprocess
.PIPE
,
426 stdout
=subprocess
.PIPE
)
427 git_show
.stdin
.close()
428 clang_format_stdin
= git_show
.stdout
430 clang_format_cmd
.extend([filename
])
432 clang_format_stdin
= subprocess
.PIPE
434 clang_format
= subprocess
.Popen(clang_format_cmd
, stdin
=clang_format_stdin
,
435 stdout
=subprocess
.PIPE
)
436 if clang_format_stdin
== subprocess
.PIPE
:
437 clang_format_stdin
= clang_format
.stdin
439 if e
.errno
== errno
.ENOENT
:
440 die('cannot find executable "%s"' % binary
)
443 clang_format_stdin
.close()
444 hash_object_cmd
= ['git', 'hash-object', '-w', '--path='+filename
, '--stdin']
445 hash_object
= subprocess
.Popen(hash_object_cmd
, stdin
=clang_format
.stdout
,
446 stdout
=subprocess
.PIPE
)
447 clang_format
.stdout
.close()
448 stdout
= hash_object
.communicate()[0]
449 if hash_object
.returncode
!= 0:
450 die('`%s` failed' % ' '.join(hash_object_cmd
))
451 if clang_format
.wait() != 0:
452 die('`%s` failed' % ' '.join(clang_format_cmd
))
453 if git_show
and git_show
.wait() != 0:
454 die('`%s` failed' % ' '.join(git_show_cmd
))
455 return convert_string(stdout
).rstrip('\r\n')
458 @contextlib.contextmanager
459 def temporary_index_file(tree
=None):
460 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
461 the file afterward."""
462 index_path
= create_temporary_index(tree
)
463 old_index_path
= os
.environ
.get('GIT_INDEX_FILE')
464 os
.environ
['GIT_INDEX_FILE'] = index_path
468 if old_index_path
is None:
469 del os
.environ
['GIT_INDEX_FILE']
471 os
.environ
['GIT_INDEX_FILE'] = old_index_path
472 os
.remove(index_path
)
475 def create_temporary_index(tree
=None):
476 """Create a temporary index file and return the created file's path.
478 If `tree` is not None, use that as the tree to read in. Otherwise, an
479 empty index is created."""
480 gitdir
= run('git', 'rev-parse', '--git-dir')
481 path
= os
.path
.join(gitdir
, temp_index_basename
)
484 run('git', 'read-tree', '--index-output='+path
, tree
)
488 def print_diff(old_tree
, new_tree
):
489 """Print the diff between the two trees to stdout."""
490 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
491 # is expected to be viewed by the user, and only the former does nice things
492 # like color and pagination.
494 # We also only print modified files since `new_tree` only contains the files
495 # that were modified, so unmodified files would show as deleted without the
497 subprocess
.check_call(['git', 'diff', '--diff-filter=M', old_tree
, new_tree
,
500 def print_diffstat(old_tree
, new_tree
):
501 """Print the diffstat between the two trees to stdout."""
502 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
503 # is expected to be viewed by the user, and only the former does nice things
504 # like color and pagination.
506 # We also only print modified files since `new_tree` only contains the files
507 # that were modified, so unmodified files would show as deleted without the
509 subprocess
.check_call(['git', 'diff', '--diff-filter=M', '--stat', old_tree
, new_tree
,
513 def apply_changes(old_tree
, new_tree
, force
=False, patch_mode
=False):
514 """Apply the changes in `new_tree` to the working directory.
516 Bails if there are local changes in those files and not `force`. If
517 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
518 changed_files
= run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
519 '--name-only', old_tree
,
520 new_tree
).rstrip('\0').split('\0')
522 unstaged_files
= run('git', 'diff-files', '--name-status', *changed_files
)
524 print('The following files would be modified but '
525 'have unstaged changes:', file=sys
.stderr
)
526 print(unstaged_files
, file=sys
.stderr
)
527 print('Please commit, stage, or stash them first.', file=sys
.stderr
)
530 # In patch mode, we could just as well create an index from the new tree
531 # and checkout from that, but then the user will be presented with a
532 # message saying "Discard ... from worktree". Instead, we use the old
533 # tree as the index and checkout from new_tree, which gives the slightly
534 # better message, "Apply ... to index and worktree". This is not quite
535 # right, since it won't be applied to the user's index, but oh well.
536 with
temporary_index_file(old_tree
):
537 subprocess
.check_call(['git', 'checkout', '--patch', new_tree
])
538 index_tree
= old_tree
540 with
temporary_index_file(new_tree
):
541 run('git', 'checkout-index', '-a', '-f')
545 def run(*args
, **kwargs
):
546 stdin
= kwargs
.pop('stdin', '')
547 verbose
= kwargs
.pop('verbose', True)
548 strip
= kwargs
.pop('strip', True)
550 raise TypeError("run() got an unexpected keyword argument '%s'" % name
)
551 p
= subprocess
.Popen(args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
552 stdin
=subprocess
.PIPE
)
553 stdout
, stderr
= p
.communicate(input=stdin
)
555 stdout
= convert_string(stdout
)
556 stderr
= convert_string(stderr
)
558 if p
.returncode
== 0:
561 print('`%s` printed to stderr:' % ' '.join(args
), file=sys
.stderr
)
562 print(stderr
.rstrip(), file=sys
.stderr
)
564 stdout
= stdout
.rstrip('\r\n')
567 print('`%s` returned %s' % (' '.join(args
), p
.returncode
), file=sys
.stderr
)
569 print(stderr
.rstrip(), file=sys
.stderr
)
574 print('error:', message
, file=sys
.stderr
)
578 def to_bytes(str_input
):
579 # Encode to UTF-8 to get binary data.
580 if isinstance(str_input
, bytes
):
582 return str_input
.encode('utf-8')
585 def to_string(bytes_input
):
586 if isinstance(bytes_input
, str):
588 return bytes_input
.encode('utf-8')
591 def convert_string(bytes_input
):
593 return to_string(bytes_input
.decode('utf-8'))
594 except AttributeError: # 'str' object has no attribute 'decode'.
595 return str(bytes_input
)
597 return str(bytes_input
)
599 if __name__
== '__main__':