Cleanup command line options, add --max-lookback-blocks

This commit is contained in:
Marcus Holland-Moritz 2021-03-03 01:51:40 +01:00
parent 0b0d5f4bd8
commit 425c9d68e9
4 changed files with 33 additions and 50 deletions

View File

@ -36,8 +36,7 @@ class progress;
class block_manager { class block_manager {
public: public:
struct config { struct config {
// TODO: remove vector and use single window size unsigned blockhash_window_size;
std::vector<size_t> blockhash_window_size;
unsigned window_increment_shift{1}; unsigned window_increment_shift{1};
size_t max_active_blocks{1}; size_t max_active_blocks{1};
size_t memory_limit{256 << 20}; size_t memory_limit{256 << 20};

View File

@ -162,9 +162,9 @@ class block_manager_ : public block_manager::impl {
, cfg_{cfg} , cfg_{cfg}
, os_{std::move(os)} , os_{std::move(os)}
, fsw_{fsw} , fsw_{fsw}
, window_size_{cfg.blockhash_window_size.empty() , window_size_{cfg.blockhash_window_size > 0
? 0 ? static_cast<size_t>(1) << cfg.blockhash_window_size
: cfg.blockhash_window_size.front()} : 0}
, window_step_{window_size_ >> cfg.window_increment_shift} , window_step_{window_size_ >> cfg.window_increment_shift}
, block_size_{static_cast<size_t>(1) << cfg.block_size_bits} {} , block_size_{static_cast<size_t>(1) << cfg.block_size_bits} {}

View File

@ -218,7 +218,7 @@ struct level_defaults {
char const* data_compression; char const* data_compression;
char const* schema_compression; char const* schema_compression;
char const* metadata_compression; char const* metadata_compression;
char const* window_sizes; unsigned window_size;
char const* order; char const* order;
}; };
@ -308,16 +308,16 @@ struct level_defaults {
constexpr std::array<level_defaults, 10> levels{{ constexpr std::array<level_defaults, 10> levels{{
// clang-format off // clang-format off
/* 0 */ {20, "null", "null" , "null", "-", "none"}, /* 0 */ {20, "null", "null" , "null", 0, "none"},
/* 1 */ {20, ALG_DATA_1, ALG_SCHEMA, "null", "-", "path"}, /* 1 */ {20, ALG_DATA_1, ALG_SCHEMA, "null", 0, "path"},
/* 2 */ {20, ALG_DATA_2, ALG_SCHEMA, "null", "-", "path"}, /* 2 */ {20, ALG_DATA_2, ALG_SCHEMA, "null", 0, "path"},
/* 3 */ {21, ALG_DATA_3, ALG_SCHEMA, "null", "13", "similarity"}, /* 3 */ {21, ALG_DATA_3, ALG_SCHEMA, "null", 12, "similarity"},
/* 4 */ {22, ALG_DATA_4, ALG_SCHEMA, "null", "13", "similarity"}, /* 4 */ {22, ALG_DATA_4, ALG_SCHEMA, "null", 12, "similarity"},
/* 5 */ {23, ALG_DATA_5, ALG_SCHEMA, "null", "15,13", "similarity"}, /* 5 */ {23, ALG_DATA_5, ALG_SCHEMA, "null", 12, "similarity"},
/* 6 */ {24, ALG_DATA_6, ALG_SCHEMA, "null", "16,14,12", "nilsimsa"}, /* 6 */ {24, ALG_DATA_6, ALG_SCHEMA, "null", 12, "nilsimsa"},
/* 7 */ {24, ALG_DATA_7, ALG_SCHEMA, ALG_METADATA_7, "16,14,12", "nilsimsa"}, /* 7 */ {24, ALG_DATA_7, ALG_SCHEMA, ALG_METADATA_7, 12, "nilsimsa"},
/* 8 */ {24, ALG_DATA_8, ALG_SCHEMA, ALG_METADATA_9, "16,14,12", "nilsimsa"}, /* 8 */ {24, ALG_DATA_8, ALG_SCHEMA, ALG_METADATA_9, 12, "nilsimsa"},
/* 9 */ {26, ALG_DATA_9, ALG_SCHEMA, ALG_METADATA_9, "16,14,12", "nilsimsa::50000"}, /* 9 */ {26, ALG_DATA_9, ALG_SCHEMA, ALG_METADATA_9, 12, "nilsimsa"},
// clang-format on // clang-format on
}}; }};
@ -329,7 +329,7 @@ int mkdwarfs(int argc, char** argv) {
const size_t num_cpu = std::max(std::thread::hardware_concurrency(), 1u); const size_t num_cpu = std::max(std::thread::hardware_concurrency(), 1u);
block_manager::config cfg; block_manager::config cfg;
std::string path, output, window_sizes, memory_limit, script_arg, compression, std::string path, output, memory_limit, script_arg, compression,
schema_compression, metadata_compression, log_level_str, timestamp, schema_compression, metadata_compression, log_level_str, timestamp,
time_resolution, order, progress_mode, recompress_opts; time_resolution, order, progress_mode, recompress_opts;
size_t num_workers, max_scanner_workers; size_t num_workers, max_scanner_workers;
@ -408,13 +408,16 @@ int mkdwarfs(int argc, char** argv) {
po::value<std::string>(&script_arg), po::value<std::string>(&script_arg),
"Python script for customization") "Python script for customization")
#endif #endif
("blockhash-window-sizes", ("blockhash-window-size",
po::value<std::string>(&window_sizes), po::value<unsigned>(&cfg.blockhash_window_size),
"window sizes for block hashing") "window sizes for block hashing")
("window-increment-shift", ("window-increment-shift",
po::value<unsigned>(&cfg.window_increment_shift) po::value<unsigned>(&cfg.window_increment_shift)
->default_value(1), ->default_value(1),
"window increment (as right shift of size)") "window increment (as right shift of size)")
("max-lookback-blocks",
po::value<size_t>(&cfg.max_active_blocks)->default_value(1),
"how many blocks to scan for segments")
("remove-empty-dirs", ("remove-empty-dirs",
po::value<bool>(&options.remove_empty_dirs)->zero_tokens(), po::value<bool>(&options.remove_empty_dirs)->zero_tokens(),
"remove empty directories in file system") "remove empty directories in file system")
@ -459,36 +462,35 @@ int mkdwarfs(int argc, char** argv) {
} }
if (vm.count("help") or !vm.count("input") or !vm.count("output")) { if (vm.count("help") or !vm.count("input") or !vm.count("output")) {
size_t l_dc = 0, l_sc = 0, l_mc = 0, l_ws = 0, l_or = 0; size_t l_dc = 0, l_sc = 0, l_mc = 0, l_or = 0;
for (auto const& l : levels) { for (auto const& l : levels) {
l_dc = std::max(l_dc, ::strlen(l.data_compression)); l_dc = std::max(l_dc, ::strlen(l.data_compression));
l_sc = std::max(l_sc, ::strlen(l.schema_compression)); l_sc = std::max(l_sc, ::strlen(l.schema_compression));
l_mc = std::max(l_mc, ::strlen(l.metadata_compression)); l_mc = std::max(l_mc, ::strlen(l.metadata_compression));
l_ws = std::max(l_ws, ::strlen(l.window_sizes));
l_or = std::max(l_or, ::strlen(l.order)); l_or = std::max(l_or, ::strlen(l.order));
} }
std::string sep(22 + l_dc + l_sc + l_mc + l_ws + l_or, '-'); std::string sep(28 + l_dc + l_sc + l_mc + l_or, '-');
std::cout << "mkdwarfs (" << PRJ_GIT_ID << ")\n\n" << opts << std::endl; std::cout << "mkdwarfs (" << PRJ_GIT_ID << ")\n\n" << opts << std::endl;
std::cout << "Compression level defaults:\n" std::cout << "Compression level defaults:\n"
<< " " << sep << "\n" << " " << sep << "\n"
<< fmt::format(" Level Block {:{}s} {:{}s} Inode Order\n", << fmt::format(" Level Block {:{}s} {:s} Inode\n",
"Compression Algorithm", 4 + l_dc + l_sc + l_mc, "Compression Algorithm", 4 + l_dc + l_sc + l_mc,
"Window", l_ws) "Window")
<< fmt::format(" Size {:{}s} {:{}s} {:{}s} {:{}s}\n", << fmt::format(" Size {:{}s} {:{}s} {:{}s} {:6s}\n",
"Block Data", l_dc, "Schema", l_sc, "Metadata", "Block Data", l_dc, "Schema", l_sc, "Metadata",
l_mc, "Sizes", l_ws) l_mc, "Size Order")
<< " " << sep << std::endl; << " " << sep << std::endl;
int level = 0; int level = 0;
for (auto const& l : levels) { for (auto const& l : levels) {
std::cout << fmt::format(" {:1d} {:2d} {:{}s} {:{}s} {:{}s} " std::cout << fmt::format(" {:1d} {:2d} {:{}s} {:{}s} {:{}s} "
"{:{}s} {:{}s}", " {:2d} {:{}s}",
level, l.block_size_bits, l.data_compression, level, l.block_size_bits, l.data_compression,
l_dc, l.schema_compression, l_sc, l_dc, l.schema_compression, l_sc,
l.metadata_compression, l_mc, l.window_sizes, l.metadata_compression, l_mc, l.window_size,
l_ws, l.order, l_or) l.order, l_or)
<< std::endl; << std::endl;
++level; ++level;
} }
@ -544,8 +546,8 @@ int mkdwarfs(int argc, char** argv) {
metadata_compression = defaults.metadata_compression; metadata_compression = defaults.metadata_compression;
} }
if (!vm.count("blockhash-window-sizes")) { if (!vm.count("blockhash-window-size")) {
window_sizes = defaults.window_sizes; cfg.blockhash_window_size = defaults.window_size;
} }
if (!vm.count("order")) { if (!vm.count("order")) {
@ -620,24 +622,6 @@ int mkdwarfs(int argc, char** argv) {
size_t mem_limit = parse_size_with_unit(memory_limit); size_t mem_limit = parse_size_with_unit(memory_limit);
std::vector<std::string> wsv;
if (window_sizes != "-") {
boost::split(wsv, window_sizes, boost::is_any_of(","));
try {
std::transform(wsv.begin(), wsv.end(),
std::back_inserter(cfg.blockhash_window_size),
[](const std::string& x) {
return static_cast<size_t>(1) << folly::to<unsigned>(x);
});
} catch (folly::ConversionError const& e) {
std::cerr << "error: window size is not numeric (" << window_sizes << ")"
<< std::endl;
return 1;
}
}
worker_group wg_writer("writer", num_workers); worker_group wg_writer("writer", num_workers);
worker_group wg_scanner(worker_group::load_adaptive, "scanner", worker_group wg_scanner(worker_group::load_adaptive, "scanner",
max_scanner_workers); max_scanner_workers);

View File

@ -183,7 +183,7 @@ void basic_end_to_end_test(std::string const& compressor,
block_manager::config cfg; block_manager::config cfg;
scanner_options options; scanner_options options;
cfg.blockhash_window_size.push_back(1 << 10); cfg.blockhash_window_size = 10;
cfg.block_size_bits = block_size_bits; cfg.block_size_bits = block_size_bits;
options.file_order.mode = file_order; options.file_order.mode = file_order;