271 lines
10 KiB
Python
271 lines
10 KiB
Python
#!/usr/bin/env python2
|
|
#
|
|
# Combine a set of a source files into a single C file.
|
|
#
|
|
# Overview of the process:
|
|
#
|
|
# * Parse user supplied C files. Add automatic #undefs at the end
|
|
# of each C file to avoid defines bleeding from one file to another.
|
|
#
|
|
# * Combine the C files in specified order. If sources have ordering
|
|
# dependencies (depends on application), order may matter.
|
|
#
|
|
# * Process #include statements in the combined source, categorizing
|
|
# them either as "internal" (found in specified include path) or
|
|
# "external". Internal includes, unless explicitly excluded, are
|
|
# inlined into the result while extenal includes are left as is.
|
|
# Duplicate internal #include statements are replaced with a comment.
|
|
#
|
|
# At every step, source and header lines are represented with explicit
|
|
# line objects which keep track of original filename and line. The
|
|
# output contains #line directives, if requested, to ensure error
|
|
# throwing and other diagnostic info will work in a useful manner when
|
|
# deployed. It's also possible to generate a combined source with no
|
|
# #line directives.
|
|
#
|
|
# Making the process deterministic is important, so that if users have
|
|
# diffs that they apply to the combined source, such diffs would apply
|
|
# for as long as possible.
|
|
#
|
|
# Limitations and notes:
|
|
#
|
|
# * While there are automatic #undef's for #define's introduced in each
|
|
# C file, it's not possible to "undefine" structs, unions, etc. If
|
|
# there are structs/unions/typedefs with conflicting names, these
|
|
# have to be resolved in the source files first.
|
|
#
|
|
# * Because duplicate #include statements are suppressed, currently
|
|
# assumes #include statements are not conditional.
|
|
#
|
|
# * A system header might be #include'd in multiple source files with
|
|
# different feature defines (like _BSD_SOURCE). Because the #include
|
|
# file will only appear once in the resulting source, the first
|
|
# occurrence wins. The result may not work correctly if the feature
|
|
# defines must actually be different between two or more source files.
|
|
#
|
|
|
|
import logging
|
|
import sys
|
|
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='%(name)-21s %(levelname)-7s %(message)s')
|
|
logger = logging.getLogger('combine_src.py')
|
|
logger.setLevel(logging.INFO)
|
|
|
|
import os
|
|
import re
|
|
import json
|
|
import optparse
|
|
import logging
|
|
|
|
# Include path for finding include files which are amalgamated.
|
|
include_paths = []
|
|
|
|
# Include files specifically excluded from being inlined.
|
|
include_excluded = []
|
|
|
|
class File:
|
|
filename_full = None
|
|
filename = None
|
|
lines = None
|
|
|
|
def __init__(self, filename, lines):
|
|
self.filename = os.path.basename(filename)
|
|
self.filename_full = filename
|
|
self.lines = lines
|
|
|
|
class Line:
|
|
filename_full = None
|
|
filename = None
|
|
lineno = None
|
|
data = None
|
|
|
|
def __init__(self, filename, lineno, data):
|
|
self.filename = os.path.basename(filename)
|
|
self.filename_full = filename
|
|
self.lineno = lineno
|
|
self.data = data
|
|
|
|
def readFile(filename):
|
|
lines = []
|
|
|
|
with open(filename, 'rb') as f:
|
|
lineno = 0
|
|
for line in f:
|
|
lineno += 1
|
|
if len(line) > 0 and line[-1] == '\n':
|
|
line = line[:-1]
|
|
lines.append(Line(filename, lineno, line))
|
|
|
|
return File(filename, lines)
|
|
|
|
def lookupInclude(incfn):
|
|
re_sep = re.compile(r'/|\\')
|
|
|
|
inccomp = re.split(re_sep, incfn) # split include path, support / and \
|
|
|
|
for path in include_paths:
|
|
fn = apply(os.path.join, [ path ] + inccomp)
|
|
if os.path.exists(fn):
|
|
return fn # Return full path to first match
|
|
|
|
return None
|
|
|
|
def addAutomaticUndefs(f):
|
|
defined = {}
|
|
|
|
re_def = re.compile(r'#define\s+(\w+).*$')
|
|
re_undef = re.compile(r'#undef\s+(\w+).*$')
|
|
|
|
for line in f.lines:
|
|
m = re_def.match(line.data)
|
|
if m is not None:
|
|
#logger.debug('DEFINED: %s' % repr(m.group(1)))
|
|
defined[m.group(1)] = True
|
|
m = re_undef.match(line.data)
|
|
if m is not None:
|
|
# Could just ignore #undef's here: we'd then emit
|
|
# reliable #undef's (though maybe duplicates) at
|
|
# the end.
|
|
#logger.debug('UNDEFINED: %s' % repr(m.group(1)))
|
|
if defined.has_key(m.group(1)):
|
|
del defined[m.group(1)]
|
|
|
|
# Undefine anything that seems to be left defined. This not a 100%
|
|
# process because some #undef's might be conditional which we don't
|
|
# track at the moment. Note that it's safe to #undef something that's
|
|
# not defined.
|
|
|
|
keys = sorted(defined.keys()) # deterministic order
|
|
if len(keys) > 0:
|
|
#logger.debug('STILL DEFINED: %r' % repr(defined.keys()))
|
|
f.lines.append(Line(f.filename, len(f.lines) + 1, ''))
|
|
f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */'))
|
|
for k in keys:
|
|
logger.debug('automatic #undef for ' + k)
|
|
f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k))
|
|
|
|
def createCombined(files, prologue_filename, line_directives):
|
|
res = []
|
|
line_map = [] # indicate combined source lines where uncombined file/line would change
|
|
metadata = {
|
|
'line_map': line_map
|
|
}
|
|
|
|
emit_state = [ None, None ] # curr_filename, curr_lineno
|
|
|
|
def emit(line):
|
|
if isinstance(line, (str, unicode)):
|
|
res.append(line)
|
|
emit_state[1] += 1
|
|
else:
|
|
if line.filename != emit_state[0] or line.lineno != emit_state[1]:
|
|
if line_directives:
|
|
res.append('#line %d "%s"' % (line.lineno, line.filename))
|
|
line_map.append({ 'original_file': line.filename,
|
|
'original_line': line.lineno,
|
|
'combined_line': len(res) + 1 })
|
|
res.append(line.data)
|
|
emit_state[0] = line.filename
|
|
emit_state[1] = line.lineno + 1
|
|
|
|
included = {} # headers already included
|
|
|
|
if prologue_filename is not None:
|
|
with open(prologue_filename, 'rb') as f:
|
|
for line in f.read().split('\n'):
|
|
res.append(line)
|
|
|
|
re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$')
|
|
|
|
# Process a file, appending it to the result; the input may be a
|
|
# source or an include file. #include directives are handled
|
|
# recursively.
|
|
def processFile(f):
|
|
logger.debug('Process file: ' + f.filename)
|
|
|
|
for line in f.lines:
|
|
if not line.data.startswith('#include'):
|
|
emit(line)
|
|
continue
|
|
|
|
m = re_inc.match(line.data)
|
|
if m is None:
|
|
raise Exception('Couldn\'t match #include line: %s' % repr(line.data))
|
|
incpath = m.group(2)
|
|
if incpath in include_excluded:
|
|
# Specific include files excluded from the
|
|
# inlining / duplicate suppression process.
|
|
emit(line) # keep as is
|
|
continue
|
|
|
|
if included.has_key(incpath):
|
|
# We suppress duplicate includes, both internal and
|
|
# external, based on the assumption that includes are
|
|
# not behind #if defined() checks. This is the case for
|
|
# Duktape (except for the include files excluded).
|
|
emit('/* #include %s -> already included */' % incpath)
|
|
continue
|
|
included[incpath] = True
|
|
|
|
# An include file is considered "internal" and is amalgamated
|
|
# if it is found in the include path provided by the user.
|
|
|
|
incfile = lookupInclude(incpath)
|
|
if incfile is not None:
|
|
logger.debug('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile)))
|
|
emit('/* #include %s */' % incpath)
|
|
processFile(readFile(incfile))
|
|
else:
|
|
logger.debug('Include considered external: %s' % repr(line.data))
|
|
emit(line) # keep as is
|
|
|
|
for f in files:
|
|
processFile(f)
|
|
|
|
return '\n'.join(res) + '\n', metadata
|
|
|
|
def main():
|
|
global include_paths, include_excluded
|
|
|
|
parser = optparse.OptionParser()
|
|
parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times')
|
|
parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)')
|
|
parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file')
|
|
parser.add_option('--output-source', dest='output_source', help='Output source filename')
|
|
parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename')
|
|
parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source')
|
|
parser.add_option('--quiet', dest='quiet', action='store_true', default=False, help='Suppress info messages (show warnings)')
|
|
parser.add_option('--verbose', dest='verbose', action='store_true', default=False, help='Show verbose debug messages')
|
|
(opts, args) = parser.parse_args()
|
|
|
|
assert(opts.include_paths is not None)
|
|
include_paths = opts.include_paths # global for easy access
|
|
include_excluded = opts.include_excluded
|
|
assert(opts.output_source)
|
|
assert(opts.output_metadata)
|
|
|
|
# Log level.
|
|
if opts.quiet:
|
|
logger.setLevel(logging.WARNING)
|
|
elif opts.verbose:
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
# Read input files, add automatic #undefs
|
|
sources = args
|
|
files = []
|
|
for fn in sources:
|
|
res = readFile(fn)
|
|
logger.debug('Add automatic undefs for: ' + fn)
|
|
addAutomaticUndefs(res)
|
|
files.append(res)
|
|
|
|
combined_source, metadata = \
|
|
createCombined(files, opts.prologue, opts.line_directives)
|
|
with open(opts.output_source, 'wb') as f:
|
|
f.write(combined_source)
|
|
with open(opts.output_metadata, 'wb') as f:
|
|
f.write(json.dumps(metadata, indent=4))
|
|
|
|
logger.info('Combined %d source files, %d bytes written to %s' % (len(files), len(combined_source), opts.output_source))
|
|
|
|
if __name__ == '__main__':
|
|
main()
|