tools: add an amalgamate tool and description of usage (#22034)

2025-09-09 15:27:05 -04:00 · 2024-08-12 01:51:08 -06:00 · 2024-08-12 01:51:08 -06:00 · ac3045b472
commit ac3045b472
parent 95ff9340ea
2 changed files with 270 additions and 6 deletions
--- a/cmd/tools/amalgamate.v
+++ b/cmd/tools/amalgamate.v
@ -0,0 +1,221 @@
 // amalgamate multiple C source files into a single
 // C source file.  See https://sqlite.org/amalgamation.html
 // for a description of file amalgamation.
 //
 // If an input file is not specified, source is read
 // from stdin.
 //
 // If an output file is not specified, source is output
 // to stdout.
 module main
 import flag
 import os
 import regex
 const app_name = 'amalgamate'
 const app_version = '0.0.1'
 // pre-compile the include statement regex
 const re = regex.regex_opt(r'^\s*#\s*include\s*"([^"]+)"')!
 struct Config {
 mut:
 	input_files []string
 	output_file string
 	search_dirs []string
 	blacklist   []string
 }
 struct Context {
 	config Config
 mut:
 	processed_files []string
 }
 fn parse_arguments() Config {
 	mut cfg := Config{}
 	mut parser := flag.new_flag_parser(os.args)
 	parser.skip_executable()
 	parser.application(app_name)
 	parser.version(app_version)
 	parser.arguments_description('[file ...]')
 	parser.description('combine multiple .c and .h files into one.')
 	parser.description('')
 	parser.description('Combine input, coming from either stdin or input files, into one')
 	parser.description('large file.  Include statements are processed and the contents')
 	parser.description('copied in place.  Only #include "file.h" statements cause their')
 	parser.description('contents to be copied, not #include <file.h> statements.  If no')
 	parser.description('input files are specified, read from stdin.')
 	parser.footer('\nAn example showing multiple blacklisted files and multiple search')
 	parser.footer('directories.')
 	parser.footer('')
 	parser.footer('    amalgamate -o output_file.c -b ignore_me.h \\')
 	parser.footer('        -b ignore_me_2.h -b other/ignore_me.h \\')
 	parser.footer('        -s relative/search/dir -s /absolute/search/dir \\')
 	parser.footer('        file1.c file2.c')
 	parser.footer('')
 	cfg.output_file = parser.string('output', `o`, '', 'output file.  If not specified,\n' +
 		flag.space + 'defaults to stdout.\n', val_desc: '<filename>')
 	cfg.blacklist = parser.string_multi('blacklist', `b`,
 		'blacklist a file name.  This prevents\n' + flag.space +
 		'the named file from being included.\n' + flag.space +
 		'This can be specified more that once.\n', val_desc: '<include_file>')
 	cfg.search_dirs = parser.string_multi('search_path', `s`,
 		'add a directory to the search path.\n' + flag.space +
 		'An include file is searched for in\n' + flag.space +
 		'the current working directory and\n' + flag.space +
 		'if not found, the directories in this\n' + flag.space +
 		'list are searched, in order, until the\n' + flag.space +
 		'file is found or the search list is\n' + flag.space +
 		'exhausted.  This can be specified\n' + flag.space + 'more that once.\n',
 		val_desc: '<search_dir>'
 	)
 	cfg.input_files = parser.finalize() or {
 		// this only reports the first unrecognized argument
 		eprintln('${err}\n')
 		eprintln('${parser.usage()}\n')
 		exit(1)
 	}
 	return cfg
 }
 fn main() {
 	cfg := parse_arguments()
 	mut ctx := Context{
 		config: cfg
 	}
 	ctx.amalgamate() or {
 		eprintln('error: ${err}')
 		exit(1)
 	}
 }
 fn (mut c Context) amalgamate() ! {
 	mut source := ''
 	if c.config.input_files.len == 0 {
 		// source += '/* ########## stdin */\n'
 		// if there are no input files, read from stdin
 		local_dir := os.getwd()
 		source += c.handle_includes(local_dir, os.get_raw_lines_joined())!
 		// source += '/* ########## stdin end */\n'
 	} else {
 		// read each input file, in order, and
 		// handle all of its includes.
 		for file in c.config.input_files {
 			if file in c.config.blacklist {
 				// skip blacklisted files
 				continue
 			}
 			found_file := c.find_file(file)!
 			if found_file in c.processed_files {
 				// skip over files already read
 				continue
 			}
 			// source += '/* ########## ${file} */\n'
 			c.processed_files << found_file
 			local_dir := os.dir(found_file)
 			file_source_code := os.read_file(found_file)!
 			source += c.handle_includes(local_dir, file_source_code)!
 			// source += '/* ########## ${file} end */\n'
 		}
 	}
 	if c.config.output_file == '' {
 		print(source)
 	} else {
 		os.write_file(c.config.output_file, source)!
 	}
 	return
 }
 fn (c Context) find_file(file string) !string {
 	mut full_path := os.real_path(file)
 	if os.is_file(full_path) {
 		return full_path
 	}
 	for dir in c.config.search_dirs {
 		full_path = os.real_path(os.join_path_single(dir, file))
 		if os.is_file(full_path) {
 			return full_path
 		}
 	}
 	return error('file "${file}" not found')
 }
 // handle_includes looks for lines that start with #include
 // and inserts the lines from the named include file.
 //
 // The pattern matches file names for local header files,
 // not system header files as are denoted by < and >.
 fn (mut c Context) handle_includes(local_dir string, input_source string) !string {
 	source_lines := input_source.split_into_lines()
 	mut output_lines := []string{}
 	for line in source_lines {
 		start, _ := re.match_string(line)
 		if start >= 0 {
 			file := line[re.groups[0]..re.groups[1]]
 			mut found_file := ''
 			if file in c.config.blacklist {
 				// leave blacklisted files alone
 				if file in c.processed_files {
 					// we don't want a second include
 					output_lines << '\n'
 				} else {
 					output_lines << line
 					c.processed_files << file
 				}
 				continue
 			}
 			if !os.is_abs_path(file) {
 				found_file = c.find_file(os.join_path_single(local_dir, file)) or {
 					// keep looking
 					''
 				}
 			}
 			if found_file == '' {
 				found_file = c.find_file(file)!
 			}
 			if found_file in c.processed_files {
 				// skip over files already read
 				continue
 			}
 			c.processed_files << found_file
 			file_source_code := os.read_file(found_file)!
 			// output_lines << '/* ########## ${file} begin */\n'
 			output_lines << c.handle_includes(os.dir(found_file), file_source_code)!
 			// output_lines << '/* ########## ${file} end */\n'
 		} else {
 			output_lines << line
 		}
 	}
 	return output_lines.join_lines() + '\n'
 }
--- a/thirdparty/libgc/amalgamation.txt
+++ b/thirdparty/libgc/amalgamation.txt
@ -1,7 +1,50 @@
 The libgc source is distributed here as an amalgamation (https://sqlite.org/amalgamation.html).
-This means that, rather than mirroring the entire bdwgc repo here,
+This means that, rather than mirroring the entire bdwgc repo here, the amalgamate tool
-[this script](https://gist.github.com/spaceface777/34d25420f2dc4953fb7864f44a211105) was used
+was used to bundle all C files and local includes together into a single C file, which is
-to bundle all local includes together into a single C file, which is much easier to handle.
+much easier to handle.  This helps keep the V source distribution small, can reduce compile
-Furthermore, the script above was also used to minify (i.e. remove comments and whitespace in)
+times by 3%-15%, and can help C compilers generate more optimized code.
-the garbage collector source. Together, these details help keep the V source distribution small,
+
-can reduce compile times by 3%-15%, and can help C compilers generate more optimized code.
+For generating the libgc amalgamation, the following commands were used:
    git clone https://github.com/ivmai/bdwgc.git
    cd bdwgc
    ./autogen.sh
    ./configure --enable-threads=pthreads \
        --enable-static \
        --enable-shared=no \
        --enable-thread-local-alloc=no \
        --enable-parallel-mark \
        --enable-single-obj-compilation \
        --enable-gc-debug
    ../../../cmd/tools/amalgamate -o ../gc.c \
        -b atomic_ops.h \
        -b gc/gc.h \
        -b gc/gc_backptr.h \
        -b gc/gc_disclaim.h \
        -b gc/gc_gcj.h \
        -b gc/gc_inline.h \
        -b gc/gc_mark.h \
        -b gc/gc_pthread_redirects.h \
        -b gc/gc_tiny_fl.h \
        -b gc/gc_typed.h \
        -b gc/javaxfc.h \
        -b il/PCR_IL.h \
        -b mm/PCR_MM.h \
        -b psp2-support.h \
        -b stubinfo.h \
        -b th/PCR_ThCtl.h \
        -b vd/PCR_VD.h \
        -s include \
        -s include/private \
        extra/gc.c
 The updated header files are then copied into the include/gc directory.  We can delete
 include/gc/gc_cpp.h since this header is not needed by V.  And, we can remove the git
 repo for bdwgc.
    cp include/gc/*.h ../include/gc
    cd ..
    rm include/gc/gc_cpp.h
    rm -rf bdwgc