]> git.ipfire.org Git - people/ms/suricata.git/blob - scripts/git-clang-format-custom
detect: allows <> syntax for uint ranges
[people/ms/suricata.git] / scripts / git-clang-format-custom
1 #!/usr/bin/env python
2 # Copy of https://github.com/llvm-mirror/clang/blob/master/tools/clang-format/git-clang-format
3 # Adds a --diffstat option to show the files needing formatting.
4 # This change will be upstreamed, but the current git-clang-format does not
5 # have it yet. We use it in the internal scripts/clang-format.sh
6 #
7 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
8 #
9 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10 # See https://llvm.org/LICENSE.txt for license information.
11 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12 #
13 #===------------------------------------------------------------------------===#
14
15 r"""
16 clang-format git integration
17 ============================
18
19 This file provides a clang-format integration for git. Put it somewhere in your
20 path and ensure that it is executable. Then, "git clang-format" will invoke
21 clang-format on the changes in current files or a specific commit.
22
23 For further details, run:
24 git clang-format -h
25
26 Requires Python 2.7 or Python 3
27 """
28
29 from __future__ import absolute_import, division, print_function
30 import argparse
31 import collections
32 import contextlib
33 import errno
34 import os
35 import re
36 import subprocess
37 import sys
38
39 usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
40
41 desc = '''
42 If zero or one commits are given, run clang-format on all lines that differ
43 between the working directory and <commit>, which defaults to HEAD. Changes are
44 only applied to the working directory.
45
46 If two commits are given (requires --diff), run clang-format on all lines in the
47 second <commit> that differ from the first <commit>.
48
49 The following git-config settings set the default of the corresponding option:
50 clangFormat.binary
51 clangFormat.commit
52 clangFormat.extensions
53 clangFormat.style
54 '''
55
56 # Name of the temporary index file in which save the output of clang-format.
57 # This file is created within the .git directory.
58 temp_index_basename = 'clang-format-index'
59
60
61 Range = collections.namedtuple('Range', 'start, count')
62
63
64 def main():
65 config = load_git_config()
66
67 # In order to keep '--' yet allow options after positionals, we need to
68 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
69 # nargs=argparse.REMAINDER disallows options after positionals.)
70 argv = sys.argv[1:]
71 try:
72 idx = argv.index('--')
73 except ValueError:
74 dash_dash = []
75 else:
76 dash_dash = argv[idx:]
77 argv = argv[:idx]
78
79 default_extensions = ','.join([
80 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
81 'c', 'h', # C
82 'm', # ObjC
83 'mm', # ObjC++
84 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++
85 'cu', # CUDA
86 # Other languages that clang-format supports
87 'proto', 'protodevel', # Protocol Buffers
88 'java', # Java
89 'js', # JavaScript
90 'ts', # TypeScript
91 'cs', # C Sharp
92 ])
93
94 p = argparse.ArgumentParser(
95 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
96 description=desc)
97 p.add_argument('--binary',
98 default=config.get('clangformat.binary', 'clang-format'),
99 help='path to clang-format'),
100 p.add_argument('--commit',
101 default=config.get('clangformat.commit', 'HEAD'),
102 help='default commit to use if none is specified'),
103 p.add_argument('--diff', action='store_true',
104 help='print a diff instead of applying the changes')
105 p.add_argument('--diffstat', action='store_true',
106 help='print diffstat instead of applying the changes')
107 p.add_argument('--extensions',
108 default=config.get('clangformat.extensions',
109 default_extensions),
110 help=('comma-separated list of file extensions to format, '
111 'excluding the period and case-insensitive')),
112 p.add_argument('-f', '--force', action='store_true',
113 help='allow changes to unstaged files')
114 p.add_argument('-p', '--patch', action='store_true',
115 help='select hunks interactively')
116 p.add_argument('-q', '--quiet', action='count', default=0,
117 help='print less information')
118 p.add_argument('--style',
119 default=config.get('clangformat.style', None),
120 help='passed to clang-format'),
121 p.add_argument('-v', '--verbose', action='count', default=0,
122 help='print extra information')
123 # We gather all the remaining positional arguments into 'args' since we need
124 # to use some heuristics to determine whether or not <commit> was present.
125 # However, to print pretty messages, we make use of metavar and help.
126 p.add_argument('args', nargs='*', metavar='<commit>',
127 help='revision from which to compute the diff')
128 p.add_argument('ignored', nargs='*', metavar='<file>...',
129 help='if specified, only consider differences in these files')
130 opts = p.parse_args(argv)
131
132 opts.verbose -= opts.quiet
133 del opts.quiet
134
135 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
136 if len(commits) > 1:
137 if not opts.diff:
138 die('--diff is required when two commits are given')
139 else:
140 if len(commits) > 2:
141 die('at most two commits allowed; %d given' % len(commits))
142 changed_lines = compute_diff_and_extract_lines(commits, files)
143 if opts.verbose >= 1:
144 ignored_files = set(changed_lines)
145 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
146 if opts.verbose >= 1:
147 ignored_files.difference_update(changed_lines)
148 if ignored_files:
149 print('Ignoring changes in the following files (wrong extension):')
150 for filename in ignored_files:
151 print(' %s' % filename)
152 if changed_lines:
153 print('Running clang-format on the following files:')
154 for filename in changed_lines:
155 print(' %s' % filename)
156 if not changed_lines:
157 print('no modified files to format')
158 return
159 # The computed diff outputs absolute paths, so we must cd before accessing
160 # those files.
161 cd_to_toplevel()
162 if len(commits) > 1:
163 old_tree = commits[1]
164 new_tree = run_clang_format_and_save_to_tree(changed_lines,
165 revision=commits[1],
166 binary=opts.binary,
167 style=opts.style)
168 else:
169 old_tree = create_tree_from_workdir(changed_lines)
170 new_tree = run_clang_format_and_save_to_tree(changed_lines,
171 binary=opts.binary,
172 style=opts.style)
173 if opts.verbose >= 1:
174 print('old tree: %s' % old_tree)
175 print('new tree: %s' % new_tree)
176 if old_tree == new_tree:
177 if opts.verbose >= 0:
178 print('clang-format did not modify any files')
179 elif opts.diff:
180 print_diff(old_tree, new_tree)
181 elif opts.diffstat:
182 print_diffstat(old_tree, new_tree)
183 else:
184 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
185 patch_mode=opts.patch)
186 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
187 print('changed files:')
188 for filename in changed_files:
189 print(' %s' % filename)
190
191
192 def load_git_config(non_string_options=None):
193 """Return the git configuration as a dictionary.
194
195 All options are assumed to be strings unless in `non_string_options`, in which
196 is a dictionary mapping option name (in lower case) to either "--bool" or
197 "--int"."""
198 if non_string_options is None:
199 non_string_options = {}
200 out = {}
201 for entry in run('git', 'config', '--list', '--null').split('\0'):
202 if entry:
203 name, value = entry.split('\n', 1)
204 if name in non_string_options:
205 value = run('git', 'config', non_string_options[name], name)
206 out[name] = value
207 return out
208
209
210 def interpret_args(args, dash_dash, default_commit):
211 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
212
213 It is assumed that "--" and everything that follows has been removed from
214 args and placed in `dash_dash`.
215
216 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
217 left (if present) are taken as commits. Otherwise, the arguments are checked
218 from left to right if they are commits or files. If commits are not given,
219 a list with `default_commit` is used."""
220 if dash_dash:
221 if len(args) == 0:
222 commits = [default_commit]
223 else:
224 commits = args
225 for commit in commits:
226 object_type = get_object_type(commit)
227 if object_type not in ('commit', 'tag'):
228 if object_type is None:
229 die("'%s' is not a commit" % commit)
230 else:
231 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
232 files = dash_dash[1:]
233 elif args:
234 commits = []
235 while args:
236 if not disambiguate_revision(args[0]):
237 break
238 commits.append(args.pop(0))
239 if not commits:
240 commits = [default_commit]
241 files = args
242 else:
243 commits = [default_commit]
244 files = []
245 return commits, files
246
247
248 def disambiguate_revision(value):
249 """Returns True if `value` is a revision, False if it is a file, or dies."""
250 # If `value` is ambiguous (neither a commit nor a file), the following
251 # command will die with an appropriate error message.
252 run('git', 'rev-parse', value, verbose=False)
253 object_type = get_object_type(value)
254 if object_type is None:
255 return False
256 if object_type in ('commit', 'tag'):
257 return True
258 die('`%s` is a %s, but a commit or filename was expected' %
259 (value, object_type))
260
261
262 def get_object_type(value):
263 """Returns a string description of an object's type, or None if it is not
264 a valid git object."""
265 cmd = ['git', 'cat-file', '-t', value]
266 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
267 stdout, stderr = p.communicate()
268 if p.returncode != 0:
269 return None
270 return convert_string(stdout.strip())
271
272
273 def compute_diff_and_extract_lines(commits, files):
274 """Calls compute_diff() followed by extract_lines()."""
275 diff_process = compute_diff(commits, files)
276 changed_lines = extract_lines(diff_process.stdout)
277 diff_process.stdout.close()
278 diff_process.wait()
279 if diff_process.returncode != 0:
280 # Assume error was already printed to stderr.
281 sys.exit(2)
282 return changed_lines
283
284
285 def compute_diff(commits, files):
286 """Return a subprocess object producing the diff from `commits`.
287
288 The return value's `stdin` file object will produce a patch with the
289 differences between the working directory and the first commit if a single
290 one was specified, or the difference between both specified commits, filtered
291 on `files` (if non-empty). Zero context lines are used in the patch."""
292 git_tool = 'diff-index'
293 if len(commits) > 1:
294 git_tool = 'diff-tree'
295 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
296 cmd.extend(files)
297 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
298 p.stdin.close()
299 return p
300
301
302 def extract_lines(patch_file):
303 """Extract the changed lines in `patch_file`.
304
305 The return value is a dictionary mapping filename to a list of (start_line,
306 line_count) pairs.
307
308 The input must have been produced with ``-U0``, meaning unidiff format with
309 zero lines of context. The return value is a dict mapping filename to a
310 list of line `Range`s."""
311 matches = {}
312 for line in patch_file:
313 line = convert_string(line)
314 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
315 if match:
316 filename = match.group(1).rstrip('\r\n')
317 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
318 if match:
319 start_line = int(match.group(1))
320 line_count = 1
321 if match.group(3):
322 line_count = int(match.group(3))
323 if line_count > 0:
324 matches.setdefault(filename, []).append(Range(start_line, line_count))
325 return matches
326
327
328 def filter_by_extension(dictionary, allowed_extensions):
329 """Delete every key in `dictionary` that doesn't have an allowed extension.
330
331 `allowed_extensions` must be a collection of lowercase file extensions,
332 excluding the period."""
333 allowed_extensions = frozenset(allowed_extensions)
334 for filename in list(dictionary.keys()):
335 base_ext = filename.rsplit('.', 1)
336 if len(base_ext) == 1 and '' in allowed_extensions:
337 continue
338 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
339 del dictionary[filename]
340
341
342 def cd_to_toplevel():
343 """Change to the top level of the git repository."""
344 toplevel = run('git', 'rev-parse', '--show-toplevel')
345 os.chdir(toplevel)
346
347
348 def create_tree_from_workdir(filenames):
349 """Create a new git tree with the given files from the working directory.
350
351 Returns the object ID (SHA-1) of the created tree."""
352 return create_tree(filenames, '--stdin')
353
354
355 def run_clang_format_and_save_to_tree(changed_lines, revision=None,
356 binary='clang-format', style=None):
357 """Run clang-format on each file and save the result to a git tree.
358
359 Returns the object ID (SHA-1) of the created tree."""
360 def iteritems(container):
361 try:
362 return container.iteritems() # Python 2
363 except AttributeError:
364 return container.items() # Python 3
365 def index_info_generator():
366 for filename, line_ranges in iteritems(changed_lines):
367 if revision:
368 git_metadata_cmd = ['git', 'ls-tree',
369 '%s:%s' % (revision, os.path.dirname(filename)),
370 os.path.basename(filename)]
371 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
372 stdout=subprocess.PIPE)
373 stdout = git_metadata.communicate()[0]
374 mode = oct(int(stdout.split()[0], 8))
375 else:
376 mode = oct(os.stat(filename).st_mode)
377 # Adjust python3 octal format so that it matches what git expects
378 if mode.startswith('0o'):
379 mode = '0' + mode[2:]
380 blob_id = clang_format_to_blob(filename, line_ranges,
381 revision=revision,
382 binary=binary,
383 style=style)
384 yield '%s %s\t%s' % (mode, blob_id, filename)
385 return create_tree(index_info_generator(), '--index-info')
386
387
388 def create_tree(input_lines, mode):
389 """Create a tree object from the given input.
390
391 If mode is '--stdin', it must be a list of filenames. If mode is
392 '--index-info' is must be a list of values suitable for "git update-index
393 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
394 is invalid."""
395 assert mode in ('--stdin', '--index-info')
396 cmd = ['git', 'update-index', '--add', '-z', mode]
397 with temporary_index_file():
398 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
399 for line in input_lines:
400 p.stdin.write(to_bytes('%s\0' % line))
401 p.stdin.close()
402 if p.wait() != 0:
403 die('`%s` failed' % ' '.join(cmd))
404 tree_id = run('git', 'write-tree')
405 return tree_id
406
407
408 def clang_format_to_blob(filename, line_ranges, revision=None,
409 binary='clang-format', style=None):
410 """Run clang-format on the given file and save the result to a git blob.
411
412 Runs on the file in `revision` if not None, or on the file in the working
413 directory if `revision` is None.
414
415 Returns the object ID (SHA-1) of the created blob."""
416 clang_format_cmd = [binary]
417 if style:
418 clang_format_cmd.extend(['-style='+style])
419 clang_format_cmd.extend([
420 '-lines=%s:%s' % (start_line, start_line+line_count-1)
421 for start_line, line_count in line_ranges])
422 if revision:
423 clang_format_cmd.extend(['-assume-filename='+filename])
424 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
425 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
426 stdout=subprocess.PIPE)
427 git_show.stdin.close()
428 clang_format_stdin = git_show.stdout
429 else:
430 clang_format_cmd.extend([filename])
431 git_show = None
432 clang_format_stdin = subprocess.PIPE
433 try:
434 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
435 stdout=subprocess.PIPE)
436 if clang_format_stdin == subprocess.PIPE:
437 clang_format_stdin = clang_format.stdin
438 except OSError as e:
439 if e.errno == errno.ENOENT:
440 die('cannot find executable "%s"' % binary)
441 else:
442 raise
443 clang_format_stdin.close()
444 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
445 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
446 stdout=subprocess.PIPE)
447 clang_format.stdout.close()
448 stdout = hash_object.communicate()[0]
449 if hash_object.returncode != 0:
450 die('`%s` failed' % ' '.join(hash_object_cmd))
451 if clang_format.wait() != 0:
452 die('`%s` failed' % ' '.join(clang_format_cmd))
453 if git_show and git_show.wait() != 0:
454 die('`%s` failed' % ' '.join(git_show_cmd))
455 return convert_string(stdout).rstrip('\r\n')
456
457
458 @contextlib.contextmanager
459 def temporary_index_file(tree=None):
460 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
461 the file afterward."""
462 index_path = create_temporary_index(tree)
463 old_index_path = os.environ.get('GIT_INDEX_FILE')
464 os.environ['GIT_INDEX_FILE'] = index_path
465 try:
466 yield
467 finally:
468 if old_index_path is None:
469 del os.environ['GIT_INDEX_FILE']
470 else:
471 os.environ['GIT_INDEX_FILE'] = old_index_path
472 os.remove(index_path)
473
474
475 def create_temporary_index(tree=None):
476 """Create a temporary index file and return the created file's path.
477
478 If `tree` is not None, use that as the tree to read in. Otherwise, an
479 empty index is created."""
480 gitdir = run('git', 'rev-parse', '--git-dir')
481 path = os.path.join(gitdir, temp_index_basename)
482 if tree is None:
483 tree = '--empty'
484 run('git', 'read-tree', '--index-output='+path, tree)
485 return path
486
487
488 def print_diff(old_tree, new_tree):
489 """Print the diff between the two trees to stdout."""
490 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
491 # is expected to be viewed by the user, and only the former does nice things
492 # like color and pagination.
493 #
494 # We also only print modified files since `new_tree` only contains the files
495 # that were modified, so unmodified files would show as deleted without the
496 # filter.
497 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
498 '--'])
499
500 def print_diffstat(old_tree, new_tree):
501 """Print the diffstat between the two trees to stdout."""
502 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
503 # is expected to be viewed by the user, and only the former does nice things
504 # like color and pagination.
505 #
506 # We also only print modified files since `new_tree` only contains the files
507 # that were modified, so unmodified files would show as deleted without the
508 # filter.
509 subprocess.check_call(['git', 'diff', '--diff-filter=M', '--stat', old_tree, new_tree,
510 '--'])
511
512
513 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
514 """Apply the changes in `new_tree` to the working directory.
515
516 Bails if there are local changes in those files and not `force`. If
517 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
518 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
519 '--name-only', old_tree,
520 new_tree).rstrip('\0').split('\0')
521 if not force:
522 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
523 if unstaged_files:
524 print('The following files would be modified but '
525 'have unstaged changes:', file=sys.stderr)
526 print(unstaged_files, file=sys.stderr)
527 print('Please commit, stage, or stash them first.', file=sys.stderr)
528 sys.exit(2)
529 if patch_mode:
530 # In patch mode, we could just as well create an index from the new tree
531 # and checkout from that, but then the user will be presented with a
532 # message saying "Discard ... from worktree". Instead, we use the old
533 # tree as the index and checkout from new_tree, which gives the slightly
534 # better message, "Apply ... to index and worktree". This is not quite
535 # right, since it won't be applied to the user's index, but oh well.
536 with temporary_index_file(old_tree):
537 subprocess.check_call(['git', 'checkout', '--patch', new_tree])
538 index_tree = old_tree
539 else:
540 with temporary_index_file(new_tree):
541 run('git', 'checkout-index', '-a', '-f')
542 return changed_files
543
544
545 def run(*args, **kwargs):
546 stdin = kwargs.pop('stdin', '')
547 verbose = kwargs.pop('verbose', True)
548 strip = kwargs.pop('strip', True)
549 for name in kwargs:
550 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
551 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
552 stdin=subprocess.PIPE)
553 stdout, stderr = p.communicate(input=stdin)
554
555 stdout = convert_string(stdout)
556 stderr = convert_string(stderr)
557
558 if p.returncode == 0:
559 if stderr:
560 if verbose:
561 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
562 print(stderr.rstrip(), file=sys.stderr)
563 if strip:
564 stdout = stdout.rstrip('\r\n')
565 return stdout
566 if verbose:
567 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
568 if stderr:
569 print(stderr.rstrip(), file=sys.stderr)
570 sys.exit(2)
571
572
573 def die(message):
574 print('error:', message, file=sys.stderr)
575 sys.exit(2)
576
577
578 def to_bytes(str_input):
579 # Encode to UTF-8 to get binary data.
580 if isinstance(str_input, bytes):
581 return str_input
582 return str_input.encode('utf-8')
583
584
585 def to_string(bytes_input):
586 if isinstance(bytes_input, str):
587 return bytes_input
588 return bytes_input.encode('utf-8')
589
590
591 def convert_string(bytes_input):
592 try:
593 return to_string(bytes_input.decode('utf-8'))
594 except AttributeError: # 'str' object has no attribute 'decode'.
595 return str(bytes_input)
596 except UnicodeError:
597 return str(bytes_input)
598
599 if __name__ == '__main__':
600 main()