tools: add an amalgamate tool and description of usage (#22034)

This commit is contained in:
Kim Shrier 2024-08-12 01:51:08 -06:00 committed by GitHub
parent 95ff9340ea
commit ac3045b472
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 270 additions and 6 deletions

221
cmd/tools/amalgamate.v Normal file
View File

@ -0,0 +1,221 @@
// amalgamate multiple C source files into a single
// C source file. See https://sqlite.org/amalgamation.html
// for a description of file amalgamation.
//
// If an input file is not specified, source is read
// from stdin.
//
// If an output file is not specified, source is output
// to stdout.
module main
import flag
import os
import regex
const app_name = 'amalgamate'
const app_version = '0.0.1'
// pre-compile the include statement regex
const re = regex.regex_opt(r'^\s*#\s*include\s*"([^"]+)"')!
struct Config {
mut:
input_files []string
output_file string
search_dirs []string
blacklist []string
}
struct Context {
config Config
mut:
processed_files []string
}
fn parse_arguments() Config {
mut cfg := Config{}
mut parser := flag.new_flag_parser(os.args)
parser.skip_executable()
parser.application(app_name)
parser.version(app_version)
parser.arguments_description('[file ...]')
parser.description('combine multiple .c and .h files into one.')
parser.description('')
parser.description('Combine input, coming from either stdin or input files, into one')
parser.description('large file. Include statements are processed and the contents')
parser.description('copied in place. Only #include "file.h" statements cause their')
parser.description('contents to be copied, not #include <file.h> statements. If no')
parser.description('input files are specified, read from stdin.')
parser.footer('\nAn example showing multiple blacklisted files and multiple search')
parser.footer('directories.')
parser.footer('')
parser.footer(' amalgamate -o output_file.c -b ignore_me.h \\')
parser.footer(' -b ignore_me_2.h -b other/ignore_me.h \\')
parser.footer(' -s relative/search/dir -s /absolute/search/dir \\')
parser.footer(' file1.c file2.c')
parser.footer('')
cfg.output_file = parser.string('output', `o`, '', 'output file. If not specified,\n' +
flag.space + 'defaults to stdout.\n', val_desc: '<filename>')
cfg.blacklist = parser.string_multi('blacklist', `b`,
'blacklist a file name. This prevents\n' + flag.space +
'the named file from being included.\n' + flag.space +
'This can be specified more that once.\n', val_desc: '<include_file>')
cfg.search_dirs = parser.string_multi('search_path', `s`,
'add a directory to the search path.\n' + flag.space +
'An include file is searched for in\n' + flag.space +
'the current working directory and\n' + flag.space +
'if not found, the directories in this\n' + flag.space +
'list are searched, in order, until the\n' + flag.space +
'file is found or the search list is\n' + flag.space +
'exhausted. This can be specified\n' + flag.space + 'more that once.\n',
val_desc: '<search_dir>'
)
cfg.input_files = parser.finalize() or {
// this only reports the first unrecognized argument
eprintln('${err}\n')
eprintln('${parser.usage()}\n')
exit(1)
}
return cfg
}
fn main() {
cfg := parse_arguments()
mut ctx := Context{
config: cfg
}
ctx.amalgamate() or {
eprintln('error: ${err}')
exit(1)
}
}
fn (mut c Context) amalgamate() ! {
mut source := ''
if c.config.input_files.len == 0 {
// source += '/* ########## stdin */\n'
// if there are no input files, read from stdin
local_dir := os.getwd()
source += c.handle_includes(local_dir, os.get_raw_lines_joined())!
// source += '/* ########## stdin end */\n'
} else {
// read each input file, in order, and
// handle all of its includes.
for file in c.config.input_files {
if file in c.config.blacklist {
// skip blacklisted files
continue
}
found_file := c.find_file(file)!
if found_file in c.processed_files {
// skip over files already read
continue
}
// source += '/* ########## ${file} */\n'
c.processed_files << found_file
local_dir := os.dir(found_file)
file_source_code := os.read_file(found_file)!
source += c.handle_includes(local_dir, file_source_code)!
// source += '/* ########## ${file} end */\n'
}
}
if c.config.output_file == '' {
print(source)
} else {
os.write_file(c.config.output_file, source)!
}
return
}
fn (c Context) find_file(file string) !string {
mut full_path := os.real_path(file)
if os.is_file(full_path) {
return full_path
}
for dir in c.config.search_dirs {
full_path = os.real_path(os.join_path_single(dir, file))
if os.is_file(full_path) {
return full_path
}
}
return error('file "${file}" not found')
}
// handle_includes looks for lines that start with #include
// and inserts the lines from the named include file.
//
// The pattern matches file names for local header files,
// not system header files as are denoted by < and >.
fn (mut c Context) handle_includes(local_dir string, input_source string) !string {
source_lines := input_source.split_into_lines()
mut output_lines := []string{}
for line in source_lines {
start, _ := re.match_string(line)
if start >= 0 {
file := line[re.groups[0]..re.groups[1]]
mut found_file := ''
if file in c.config.blacklist {
// leave blacklisted files alone
if file in c.processed_files {
// we don't want a second include
output_lines << '\n'
} else {
output_lines << line
c.processed_files << file
}
continue
}
if !os.is_abs_path(file) {
found_file = c.find_file(os.join_path_single(local_dir, file)) or {
// keep looking
''
}
}
if found_file == '' {
found_file = c.find_file(file)!
}
if found_file in c.processed_files {
// skip over files already read
continue
}
c.processed_files << found_file
file_source_code := os.read_file(found_file)!
// output_lines << '/* ########## ${file} begin */\n'
output_lines << c.handle_includes(os.dir(found_file), file_source_code)!
// output_lines << '/* ########## ${file} end */\n'
} else {
output_lines << line
}
}
return output_lines.join_lines() + '\n'
}

View File

@ -1,7 +1,50 @@
The libgc source is distributed here as an amalgamation (https://sqlite.org/amalgamation.html). The libgc source is distributed here as an amalgamation (https://sqlite.org/amalgamation.html).
This means that, rather than mirroring the entire bdwgc repo here, This means that, rather than mirroring the entire bdwgc repo here, the amalgamate tool
[this script](https://gist.github.com/spaceface777/34d25420f2dc4953fb7864f44a211105) was used was used to bundle all C files and local includes together into a single C file, which is
to bundle all local includes together into a single C file, which is much easier to handle. much easier to handle. This helps keep the V source distribution small, can reduce compile
Furthermore, the script above was also used to minify (i.e. remove comments and whitespace in) times by 3%-15%, and can help C compilers generate more optimized code.
the garbage collector source. Together, these details help keep the V source distribution small,
can reduce compile times by 3%-15%, and can help C compilers generate more optimized code. For generating the libgc amalgamation, the following commands were used:
git clone https://github.com/ivmai/bdwgc.git
cd bdwgc
./autogen.sh
./configure --enable-threads=pthreads \
--enable-static \
--enable-shared=no \
--enable-thread-local-alloc=no \
--enable-parallel-mark \
--enable-single-obj-compilation \
--enable-gc-debug
../../../cmd/tools/amalgamate -o ../gc.c \
-b atomic_ops.h \
-b gc/gc.h \
-b gc/gc_backptr.h \
-b gc/gc_disclaim.h \
-b gc/gc_gcj.h \
-b gc/gc_inline.h \
-b gc/gc_mark.h \
-b gc/gc_pthread_redirects.h \
-b gc/gc_tiny_fl.h \
-b gc/gc_typed.h \
-b gc/javaxfc.h \
-b il/PCR_IL.h \
-b mm/PCR_MM.h \
-b psp2-support.h \
-b stubinfo.h \
-b th/PCR_ThCtl.h \
-b vd/PCR_VD.h \
-s include \
-s include/private \
extra/gc.c
The updated header files are then copied into the include/gc directory. We can delete
include/gc/gc_cpp.h since this header is not needed by V. And, we can remove the git
repo for bdwgc.
cp include/gc/*.h ../include/gc
cd ..
rm include/gc/gc_cpp.h
rm -rf bdwgc