From ac3045b472750f991a0e28c9b369b898d016cfbc Mon Sep 17 00:00:00 2001 From: Kim Shrier Date: Mon, 12 Aug 2024 01:51:08 -0600 Subject: [PATCH] tools: add an amalgamate tool and description of usage (#22034) --- cmd/tools/amalgamate.v | 221 ++++++++++++++++++++++++++++++ thirdparty/libgc/amalgamation.txt | 55 +++++++- 2 files changed, 270 insertions(+), 6 deletions(-) create mode 100644 cmd/tools/amalgamate.v diff --git a/cmd/tools/amalgamate.v b/cmd/tools/amalgamate.v new file mode 100644 index 0000000000..d5e95b7670 --- /dev/null +++ b/cmd/tools/amalgamate.v @@ -0,0 +1,221 @@ +// amalgamate multiple C source files into a single +// C source file. See https://sqlite.org/amalgamation.html +// for a description of file amalgamation. +// +// If an input file is not specified, source is read +// from stdin. +// +// If an output file is not specified, source is output +// to stdout. + +module main + +import flag +import os +import regex + +const app_name = 'amalgamate' +const app_version = '0.0.1' + +// pre-compile the include statement regex +const re = regex.regex_opt(r'^\s*#\s*include\s*"([^"]+)"')! + +struct Config { +mut: + input_files []string + output_file string + search_dirs []string + blacklist []string +} + +struct Context { + config Config +mut: + processed_files []string +} + +fn parse_arguments() Config { + mut cfg := Config{} + + mut parser := flag.new_flag_parser(os.args) + parser.skip_executable() + parser.application(app_name) + parser.version(app_version) + + parser.arguments_description('[file ...]') + + parser.description('combine multiple .c and .h files into one.') + parser.description('') + parser.description('Combine input, coming from either stdin or input files, into one') + parser.description('large file. Include statements are processed and the contents') + parser.description('copied in place. Only #include "file.h" statements cause their') + parser.description('contents to be copied, not #include statements. If no') + parser.description('input files are specified, read from stdin.') + + parser.footer('\nAn example showing multiple blacklisted files and multiple search') + parser.footer('directories.') + parser.footer('') + parser.footer(' amalgamate -o output_file.c -b ignore_me.h \\') + parser.footer(' -b ignore_me_2.h -b other/ignore_me.h \\') + parser.footer(' -s relative/search/dir -s /absolute/search/dir \\') + parser.footer(' file1.c file2.c') + parser.footer('') + + cfg.output_file = parser.string('output', `o`, '', 'output file. If not specified,\n' + + flag.space + 'defaults to stdout.\n', val_desc: '') + + cfg.blacklist = parser.string_multi('blacklist', `b`, + 'blacklist a file name. This prevents\n' + flag.space + + 'the named file from being included.\n' + flag.space + + 'This can be specified more that once.\n', val_desc: '') + + cfg.search_dirs = parser.string_multi('search_path', `s`, + 'add a directory to the search path.\n' + flag.space + + 'An include file is searched for in\n' + flag.space + + 'the current working directory and\n' + flag.space + + 'if not found, the directories in this\n' + flag.space + + 'list are searched, in order, until the\n' + flag.space + + 'file is found or the search list is\n' + flag.space + + 'exhausted. This can be specified\n' + flag.space + 'more that once.\n', + val_desc: '' + ) + + cfg.input_files = parser.finalize() or { + // this only reports the first unrecognized argument + eprintln('${err}\n') + eprintln('${parser.usage()}\n') + exit(1) + } + + return cfg +} + +fn main() { + cfg := parse_arguments() + + mut ctx := Context{ + config: cfg + } + + ctx.amalgamate() or { + eprintln('error: ${err}') + exit(1) + } +} + +fn (mut c Context) amalgamate() ! { + mut source := '' + + if c.config.input_files.len == 0 { + // source += '/* ########## stdin */\n' + // if there are no input files, read from stdin + local_dir := os.getwd() + source += c.handle_includes(local_dir, os.get_raw_lines_joined())! + // source += '/* ########## stdin end */\n' + } else { + // read each input file, in order, and + // handle all of its includes. + for file in c.config.input_files { + if file in c.config.blacklist { + // skip blacklisted files + continue + } + + found_file := c.find_file(file)! + + if found_file in c.processed_files { + // skip over files already read + continue + } + + // source += '/* ########## ${file} */\n' + c.processed_files << found_file + local_dir := os.dir(found_file) + file_source_code := os.read_file(found_file)! + source += c.handle_includes(local_dir, file_source_code)! + // source += '/* ########## ${file} end */\n' + } + } + + if c.config.output_file == '' { + print(source) + } else { + os.write_file(c.config.output_file, source)! + } + + return +} + +fn (c Context) find_file(file string) !string { + mut full_path := os.real_path(file) + + if os.is_file(full_path) { + return full_path + } + + for dir in c.config.search_dirs { + full_path = os.real_path(os.join_path_single(dir, file)) + + if os.is_file(full_path) { + return full_path + } + } + + return error('file "${file}" not found') +} + +// handle_includes looks for lines that start with #include +// and inserts the lines from the named include file. +// +// The pattern matches file names for local header files, +// not system header files as are denoted by < and >. +fn (mut c Context) handle_includes(local_dir string, input_source string) !string { + source_lines := input_source.split_into_lines() + mut output_lines := []string{} + + for line in source_lines { + start, _ := re.match_string(line) + + if start >= 0 { + file := line[re.groups[0]..re.groups[1]] + mut found_file := '' + + if file in c.config.blacklist { + // leave blacklisted files alone + if file in c.processed_files { + // we don't want a second include + output_lines << '\n' + } else { + output_lines << line + c.processed_files << file + } + continue + } + + if !os.is_abs_path(file) { + found_file = c.find_file(os.join_path_single(local_dir, file)) or { + // keep looking + '' + } + } + + if found_file == '' { + found_file = c.find_file(file)! + } + + if found_file in c.processed_files { + // skip over files already read + continue + } + c.processed_files << found_file + file_source_code := os.read_file(found_file)! + // output_lines << '/* ########## ${file} begin */\n' + output_lines << c.handle_includes(os.dir(found_file), file_source_code)! + // output_lines << '/* ########## ${file} end */\n' + } else { + output_lines << line + } + } + + return output_lines.join_lines() + '\n' +} diff --git a/thirdparty/libgc/amalgamation.txt b/thirdparty/libgc/amalgamation.txt index de25a3d367..4212281054 100644 --- a/thirdparty/libgc/amalgamation.txt +++ b/thirdparty/libgc/amalgamation.txt @@ -1,7 +1,50 @@ The libgc source is distributed here as an amalgamation (https://sqlite.org/amalgamation.html). -This means that, rather than mirroring the entire bdwgc repo here, -[this script](https://gist.github.com/spaceface777/34d25420f2dc4953fb7864f44a211105) was used -to bundle all local includes together into a single C file, which is much easier to handle. -Furthermore, the script above was also used to minify (i.e. remove comments and whitespace in) -the garbage collector source. Together, these details help keep the V source distribution small, -can reduce compile times by 3%-15%, and can help C compilers generate more optimized code. +This means that, rather than mirroring the entire bdwgc repo here, the amalgamate tool +was used to bundle all C files and local includes together into a single C file, which is +much easier to handle. This helps keep the V source distribution small, can reduce compile +times by 3%-15%, and can help C compilers generate more optimized code. + +For generating the libgc amalgamation, the following commands were used: + + git clone https://github.com/ivmai/bdwgc.git + cd bdwgc + ./autogen.sh + ./configure --enable-threads=pthreads \ + --enable-static \ + --enable-shared=no \ + --enable-thread-local-alloc=no \ + --enable-parallel-mark \ + --enable-single-obj-compilation \ + --enable-gc-debug + + ../../../cmd/tools/amalgamate -o ../gc.c \ + -b atomic_ops.h \ + -b gc/gc.h \ + -b gc/gc_backptr.h \ + -b gc/gc_disclaim.h \ + -b gc/gc_gcj.h \ + -b gc/gc_inline.h \ + -b gc/gc_mark.h \ + -b gc/gc_pthread_redirects.h \ + -b gc/gc_tiny_fl.h \ + -b gc/gc_typed.h \ + -b gc/javaxfc.h \ + -b il/PCR_IL.h \ + -b mm/PCR_MM.h \ + -b psp2-support.h \ + -b stubinfo.h \ + -b th/PCR_ThCtl.h \ + -b vd/PCR_VD.h \ + -s include \ + -s include/private \ + extra/gc.c + +The updated header files are then copied into the include/gc directory. We can delete +include/gc/gc_cpp.h since this header is not needed by V. And, we can remove the git +repo for bdwgc. + + cp include/gc/*.h ../include/gc + cd .. + rm include/gc/gc_cpp.h + rm -rf bdwgc +