# Tool to bundle multiple C/C++ source files, inlining any includes.
#
+# Note: there are two types of exclusion options: the '-x' flag, which besides
+# excluding a file also adds an #error directive in place of the #include, and
+# the '-k' flag, which keeps the #include and doesn't inline the file. The
+# intended use cases are: '-x' for files that would normally be #if'd out, so
+# features that 100% won't be used in the amalgamated file, for which every
+# occurrence adds the error, and '-k' for headers that we wish to manually
+# include, such as a project's public API, for which occurrences after the first
+# are removed.
+#
+# Todo: the error handling could be better, which currently throws and halts
+# (which is functional just not very friendly).
+#
# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)
import argparse, re, sys
# Compiled regex Patern to handle the following type of file includes:
#
-# #include "file"
-# #include "file"
-# # include "file"
-# #include "file"
-# #include "file" // comment
-# #include "file" // comment with quote "
+# #include "file"
+# #include "file"
+# # include "file"
+# #include "file"
+# #include "file" // comment
+# #include "file" // comment with quote "
#
# And all combinations of, as well as ignoring the following:
#
-# #include <file>
-# //#include "file"
-# /*#include "file"*/
+# #include <file>
+# //#include "file"
+# /*#include "file"*/
#
# We don't try to catch errors since the compiler will do this (and the code is
# expected to be valid before processing) and we don't care what follows the
# Simple tests to prove include_regex's cases.
#
def test_match_include() -> bool:
- if (include_regex.match('#include "file"') and
- include_regex.match(' #include "file"') and
- include_regex.match('# include "file"') and
- include_regex.match('#include "file"') and
- include_regex.match('#include "file" // comment')):
- if (not include_regex.match('#include <file>') and
- not include_regex.match('//#include "file"') and
- not include_regex.match('/*#include "file"*/')):
- found = include_regex.match('#include "file" // "')
- if (found and found.group(1) == 'file'):
- print('#include match valid')
- return True
- return False
+ if (include_regex.match('#include "file"') and
+ include_regex.match(' #include "file"') and
+ include_regex.match('# include "file"') and
+ include_regex.match('#include "file"') and
+ include_regex.match('#include "file" // comment')):
+ if (not include_regex.match('#include <file>') and
+ not include_regex.match('//#include "file"') and
+ not include_regex.match('/*#include "file"*/')):
+ found = include_regex.match('#include "file" // "')
+ if (found and found.group(1) == 'file'):
+ print('#include match valid')
+ return True
+ return False
# Compiled regex Patern to handle "#pragma once" in various formats:
#
-# #pragma once
-# #pragma once
-# # pragma once
-# #pragma once
-# #pragma once // comment
+# #pragma once
+# #pragma once
+# # pragma once
+# #pragma once
+# #pragma once // comment
#
# Ignoring commented versions, same as include_regex.
#
# Simple tests to prove pragma_regex's cases.
#
def text_match_pragma() -> bool:
- if (pragma_regex.match('#pragma once') and
- pragma_regex.match(' #pragma once') and
- pragma_regex.match('# pragma once') and
- pragma_regex.match('#pragma once') and
- pragma_regex.match('#pragma once // comment')):
- if (not pragma_regex.match('//#pragma once') and
- not pragma_regex.match('/*#pragma once*/')):
- print('#pragma once match valid')
- return True
- return False
+ if (pragma_regex.match('#pragma once') and
+ pragma_regex.match(' #pragma once') and
+ pragma_regex.match('# pragma once') and
+ pragma_regex.match('#pragma once') and
+ pragma_regex.match('#pragma once // comment')):
+ if (not pragma_regex.match('//#pragma once') and
+ not pragma_regex.match('/*#pragma once*/')):
+ print('#pragma once match valid')
+ return True
+ return False
# Finds 'file'. First the currently processing file's 'parent' path is looked at
# for a match, followed by the list of 'root' paths, returning a valid Path in
# canonical form. If no match is found None is returned.
#
def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]:
- if (parent):
- found = parent.joinpath(file).resolve();
- else:
- found = Path(file)
- if (found.is_file()):
- return found
- for root in roots:
- found = root.joinpath(file).resolve()
- if (found.is_file()):
- return found
- return None
+ if (parent):
+ found = parent.joinpath(file).resolve();
+ else:
+ found = Path(file)
+ if (found.is_file()):
+ return found
+ for root in roots:
+ found = root.joinpath(file).resolve()
+ if (found.is_file()):
+ return found
+ return None
# Helper to resolve lists of files. 'file_list' is passed in from the arguments
# and each entry resolved to its canonical path (like any include entry, either
# from the list of root paths or the owning file's 'parent', which in this case
# is case is the input file). The results are stored in 'resolved'.
#
-def resolve_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None:
- if (file_list):
- for filename in file_list:
- found = resolve_include(filename, parent)
- if (found):
- resolved.add(found)
- else:
- error_line(f'Warning: excluded file not found: {filename}')
+def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None:
+ if (file_list):
+ for filename in file_list:
+ found = resolve_include(filename, parent)
+ if (found):
+ resolved.add(found)
+ else:
+ error_line(f'Warning: excluded file not found: {filename}')
# Writes 'line' to the open 'destn' (or stdout).
#
def write_line(line: str) -> None:
- print(line, file=destn)
+ print(line, file=destn)
# Logs 'line' to stderr. This is also used for general notifications that we
# don't want to go to stdout (so the source can be piped).
#
def error_line(line: Any) -> None:
- print(line, file=sys.stderr)
+ print(line, file=sys.stderr)
# Inline the contents of 'file' (with any of its includes also inlined, etc.).
#
-def add_file(file: Path) -> None:
- if (file.is_file()):
- error_line(f'Processing: {file}')
- with file.open('r') as opened:
- for line in opened:
- line = line.rstrip('\n')
- match_include = include_regex.match(line);
- if (match_include):
- # We have a quoted include directive so grab the file
- inc_name = match_include.group(1)
- resolved = resolve_include(inc_name, file.parent)
- if (resolved):
- if (resolved in excludes):
- # The file was excluded so error if the source attempts to use it
- write_line(f'#error Using excluded file: {inc_name}')
- error_line(f'Excluding: {inc_name}')
- else:
- if (resolved not in found):
- # The file was not previously encountered
- found.add(resolved)
- if (resolved in keeps):
- # But the include was flagged to keep as included
- write_line(f'/**** *NOT* inlining {inc_name} ****/')
- write_line(line)
- error_line('Not Inlining: {inc_name}')
- else:
- # The file was neither excluded nor seen before so inline it
- write_line(f'/**** start inlining {inc_name} ****/')
- add_file(resolved)
- write_line(f'/**** ended inlining {inc_name} ****/')
- else:
- write_line(f'/**** skipping file: {inc_name} ****/')
- else:
- # The include file didn't resolve to a file
- write_line(f'#error Unable to find: {inc_name}')
- error_line(f'Error: Unable to find: {inc_name}')
- else:
- # Skip any 'pragma once' directives, otherwise write the source line
- if (keep_pragma or not pragma_regex.match(line)):
- write_line(line)
- else:
- error_line(f'Error: Invalid file: {file}')
+def add_file(file: Path, file_name: str = None) -> None:
+ if (file.is_file()):
+ if (not file_name):
+ file_name = file.name
+ error_line(f'Processing: {file_name}')
+ with file.open('r') as opened:
+ for line in opened:
+ line = line.rstrip('\n')
+ match_include = include_regex.match(line);
+ if (match_include):
+ # We have a quoted include directive so grab the file
+ inc_name = match_include.group(1)
+ resolved = resolve_include(inc_name, file.parent)
+ if (resolved):
+ if (resolved in excludes):
+ # The file was excluded so error if the compiler uses it
+ write_line(f'#error Using excluded file: {inc_name}')
+ error_line(f'Excluding: {inc_name}')
+ else:
+ if (resolved not in found):
+ # The file was not previously encountered
+ found.add(resolved)
+ if (resolved in keeps):
+ # But the include was flagged to keep as included
+ write_line(f'/**** *NOT* inlining {inc_name} ****/')
+ write_line(line)
+ error_line(f'Not inlining: {inc_name}')
+ else:
+ # The file was neither excluded nor seen before so inline it
+ write_line(f'/**** start inlining {inc_name} ****/')
+ add_file(resolved, inc_name)
+ write_line(f'/**** ended inlining {inc_name} ****/')
+ else:
+ write_line(f'/**** skipping file: {inc_name} ****/')
+ else:
+ # The include file didn't resolve to a file
+ write_line(f'#error Unable to find: {inc_name}')
+ error_line(f'Error: Unable to find: {inc_name}')
+ else:
+ # Skip any 'pragma once' directives, otherwise write the source line
+ if (keep_pragma or not pragma_regex.match(line)):
+ write_line(line)
+ else:
+ error_line(f'Error: Invalid file: {file}')
# Start here
parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c')
# Resolve all of the root paths upfront (we'll halt here on invalid roots)
if (args.root):
- for path in args.root:
- roots.add(path.resolve(strict=True))
+ for path in args.root:
+ roots.add(path.resolve(strict=True))
# The remaining params: so resolve the excluded files and #pragma once directive
-resolve_files(args.exclude, excludes, args.input.parent)
-resolve_files(args.keep, keeps, args.input.parent)
+resolve_excluded_files(args.exclude, excludes, args.input.parent)
+resolve_excluded_files(args.keep, keeps, args.input.parent)
keep_pragma = args.pragma;
# Then recursively process the input file
try:
- if (args.output):
- destn = args.output
- add_file(args.input)
+ if (args.output):
+ destn = args.output
+ add_file(args.input)
finally:
- if (not destn):
- destn.close()
+ if (not destn):
+ destn.close()