
271 lines
10 KiB

#!/usr/bin/env python2
# Combine a set of a source files into a single C file.
# Overview of the process:
# * Parse user supplied C files. Add automatic #undefs at the end
# of each C file to avoid defines bleeding from one file to another.
# * Combine the C files in specified order. If sources have ordering
# dependencies (depends on application), order may matter.
# * Process #include statements in the combined source, categorizing
# them either as "internal" (found in specified include path) or
# "external". Internal includes, unless explicitly excluded, are
# inlined into the result while extenal includes are left as is.
# Duplicate internal #include statements are replaced with a comment.
# At every step, source and header lines are represented with explicit
# line objects which keep track of original filename and line. The
# output contains #line directives, if requested, to ensure error
# throwing and other diagnostic info will work in a useful manner when
# deployed. It's also possible to generate a combined source with no
# #line directives.
# Making the process deterministic is important, so that if users have
# diffs that they apply to the combined source, such diffs would apply
# for as long as possible.
# Limitations and notes:
# * While there are automatic #undef's for #define's introduced in each
# C file, it's not possible to "undefine" structs, unions, etc. If
# there are structs/unions/typedefs with conflicting names, these
# have to be resolved in the source files first.
# * Because duplicate #include statements are suppressed, currently
# assumes #include statements are not conditional.
# * A system header might be #include'd in multiple source files with
# different feature defines (like _BSD_SOURCE). Because the #include
# file will only appear once in the resulting source, the first
# occurrence wins. The result may not work correctly if the feature
# defines must actually be different between two or more source files.
import logging
import sys
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='%(name)-21s %(levelname)-7s %(message)s')
logger = logging.getLogger('combine_src.py')
import os
import re
import json
import optparse
import logging
# Include path for finding include files which are amalgamated.
include_paths = []
# Include files specifically excluded from being inlined.
include_excluded = []
class File:
filename_full = None
filename = None
lines = None
def __init__(self, filename, lines):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lines = lines
class Line:
filename_full = None
filename = None
lineno = None
data = None
def __init__(self, filename, lineno, data):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lineno = lineno
self.data = data
def readFile(filename):
lines = []
with open(filename, 'rb') as f:
lineno = 0
for line in f:
lineno += 1
if len(line) > 0 and line[-1] == '\n':
line = line[:-1]
lines.append(Line(filename, lineno, line))
return File(filename, lines)
def lookupInclude(incfn):
re_sep = re.compile(r'/|\\')
inccomp = re.split(re_sep, incfn) # split include path, support / and \
for path in include_paths:
fn = apply(os.path.join, [ path ] + inccomp)
if os.path.exists(fn):
return fn # Return full path to first match
return None
def addAutomaticUndefs(f):
defined = {}
re_def = re.compile(r'#define\s+(\w+).*$')
re_undef = re.compile(r'#undef\s+(\w+).*$')
for line in f.lines:
m = re_def.match(line.data)
if m is not None:
#logger.debug('DEFINED: %s' % repr(m.group(1)))
defined[m.group(1)] = True
m = re_undef.match(line.data)
if m is not None:
# Could just ignore #undef's here: we'd then emit
# reliable #undef's (though maybe duplicates) at
# the end.
#logger.debug('UNDEFINED: %s' % repr(m.group(1)))
if defined.has_key(m.group(1)):
del defined[m.group(1)]
# Undefine anything that seems to be left defined. This not a 100%
# process because some #undef's might be conditional which we don't
# track at the moment. Note that it's safe to #undef something that's
# not defined.
keys = sorted(defined.keys()) # deterministic order
if len(keys) > 0:
#logger.debug('STILL DEFINED: %r' % repr(defined.keys()))
f.lines.append(Line(f.filename, len(f.lines) + 1, ''))
f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */'))
for k in keys:
logger.debug('automatic #undef for ' + k)
f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k))
def createCombined(files, prologue_filename, line_directives):
res = []
line_map = [] # indicate combined source lines where uncombined file/line would change
metadata = {
'line_map': line_map
emit_state = [ None, None ] # curr_filename, curr_lineno
def emit(line):
if isinstance(line, (str, unicode)):
emit_state[1] += 1
if line.filename != emit_state[0] or line.lineno != emit_state[1]:
if line_directives:
res.append('#line %d "%s"' % (line.lineno, line.filename))
line_map.append({ 'original_file': line.filename,
'original_line': line.lineno,
'combined_line': len(res) + 1 })
emit_state[0] = line.filename
emit_state[1] = line.lineno + 1
included = {} # headers already included
if prologue_filename is not None:
with open(prologue_filename, 'rb') as f:
for line in f.read().split('\n'):
re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$')
# Process a file, appending it to the result; the input may be a
# source or an include file. #include directives are handled
# recursively.
def processFile(f):
logger.debug('Process file: ' + f.filename)
for line in f.lines:
if not line.data.startswith('#include'):
m = re_inc.match(line.data)
if m is None:
raise Exception('Couldn\'t match #include line: %s' % repr(line.data))
incpath = m.group(2)
if incpath in include_excluded:
# Specific include files excluded from the
# inlining / duplicate suppression process.
emit(line) # keep as is
if included.has_key(incpath):
# We suppress duplicate includes, both internal and
# external, based on the assumption that includes are
# not behind #if defined() checks. This is the case for
# Duktape (except for the include files excluded).
emit('/* #include %s -> already included */' % incpath)
included[incpath] = True
# An include file is considered "internal" and is amalgamated
# if it is found in the include path provided by the user.
incfile = lookupInclude(incpath)
if incfile is not None:
logger.debug('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile)))
emit('/* #include %s */' % incpath)
logger.debug('Include considered external: %s' % repr(line.data))
emit(line) # keep as is
for f in files:
return '\n'.join(res) + '\n', metadata
def main():
global include_paths, include_excluded
parser = optparse.OptionParser()
parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times')
parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)')
parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file')
parser.add_option('--output-source', dest='output_source', help='Output source filename')
parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename')
parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source')
parser.add_option('--quiet', dest='quiet', action='store_true', default=False, help='Suppress info messages (show warnings)')
parser.add_option('--verbose', dest='verbose', action='store_true', default=False, help='Show verbose debug messages')
(opts, args) = parser.parse_args()
assert(opts.include_paths is not None)
include_paths = opts.include_paths # global for easy access
include_excluded = opts.include_excluded
# Log level.
if opts.quiet:
elif opts.verbose:
# Read input files, add automatic #undefs
sources = args
files = []
for fn in sources:
res = readFile(fn)
logger.debug('Add automatic undefs for: ' + fn)
combined_source, metadata = \
createCombined(files, opts.prologue, opts.line_directives)
with open(opts.output_source, 'wb') as f:
with open(opts.output_metadata, 'wb') as f:
f.write(json.dumps(metadata, indent=4))
logger.info('Combined %d source files, %d bytes written to %s' % (len(files), len(combined_source), opts.output_source))
if __name__ == '__main__':