From a14fa38a0de97173cf86476ded9d5bd7f7060305 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sun, 6 Nov 2022 10:52:02 +0100 Subject: [PATCH] Reintroduce --num-scanner-workers --- doc/mkdwarfs.md | 17 ++++++++++++----- src/mkdwarfs.cpp | 13 ++++++++++--- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/doc/mkdwarfs.md b/doc/mkdwarfs.md index 2df9da4a..e4d4fd8b 100644 --- a/doc/mkdwarfs.md +++ b/doc/mkdwarfs.md @@ -66,16 +66,23 @@ Most other options are concerned with compression tuning: - `-N`, `--num-workers=`*value*: Number of worker threads used for building the filesystem. This defaults to the number of processors available on your system. Use this option if - you want to limit the resources used by `mkdwarfs`. - This option affects both the scanning phase and the compression phase. + you want to limit the resources used by `mkdwarfs` or to optimize build + speed. This option affects only the compression phase. + In the compression phase, the worker threads are used to compress the + individual filesystem blocks in the background. Ordering, segmenting + and block building are, again, single-threaded and run independently. + +- `--num-scanner-workers=`*value*: + Number of worker threads used for building the filesystem. This defaults + to the number of processors available on your system. Use this option if + you want to limit the resources used by `mkdwarfs` or to optimize build + speed. This option affects only the scanning phase. By default, the same + value is used as for `--num-workers`. In the scanning phase, the worker threads are used to scan files in the background as they are discovered. File scanning includes checksumming for de-duplication as well as (optionally) checksumming for similarity computation, depending on the `--order` option. File discovery itself is single-threaded and runs independently from the scanning threads. - In the compression phase, the worker threads are used to compress the - individual filesystem blocks in the background. Ordering, segmenting - and block building are, again, single-threaded and run independently. - `-B`, `--max-lookback-blocks=`*value*: Specify how many of the most recent blocks to scan for duplicate segments. diff --git a/src/mkdwarfs.cpp b/src/mkdwarfs.cpp index 44b79fe8..70a14543 100644 --- a/src/mkdwarfs.cpp +++ b/src/mkdwarfs.cpp @@ -374,7 +374,7 @@ int mkdwarfs(int argc, char** argv) { time_resolution, order, progress_mode, recompress_opts, pack_metadata, file_hash_algo, debug_filter; std::vector filter; - size_t num_workers; + size_t num_workers, num_scanner_workers; bool no_progress = false, remove_header = false, no_section_index = false, force_overwrite = false; unsigned level; @@ -421,7 +421,10 @@ int mkdwarfs(int argc, char** argv) { "block size bits (size = 2^arg bits)") ("num-workers,N", po::value(&num_workers)->default_value(num_cpu), - "number of scanner/writer worker threads") + "number of writer (compression) worker threads") + ("num-scanner-workers", + po::value(&num_scanner_workers), + "number of scanner (hashing) worker threads") ("max-lookback-blocks,B", po::value(&cfg.max_active_blocks)->default_value(1), "how many blocks to scan for segments") @@ -715,8 +718,12 @@ int mkdwarfs(int argc, char** argv) { size_t mem_limit = parse_size_with_unit(memory_limit); + if (!vm.count("num-scanner-workers")) { + num_scanner_workers = num_workers; + } + worker_group wg_compress("compress", num_workers); - worker_group wg_scanner("scanner", num_workers); + worker_group wg_scanner("scanner", num_scanner_workers); if (vm.count("debug-filter")) { if (auto it = debug_filter_modes.find(debug_filter);