mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-18 08:49:29 -04:00
Window step size defaults depending on compression level
This commit is contained in:
parent
c322650738
commit
592c7376ce
@ -284,6 +284,8 @@ class block_manager_ final : public block_manager::impl {
|
|||||||
, block_size_{static_cast<size_t>(1) << cfg.block_size_bits}
|
, block_size_{static_cast<size_t>(1) << cfg.block_size_bits}
|
||||||
, filter_{bloom_filter_size()} {
|
, filter_{bloom_filter_size()} {
|
||||||
if (segmentation_enabled()) {
|
if (segmentation_enabled()) {
|
||||||
|
LOG_INFO << "using a " << size_with_unit(window_size_) << " window at "
|
||||||
|
<< size_with_unit(window_step_) << " steps for segment analysis";
|
||||||
LOG_INFO << "bloom filter size: " << size_with_unit(filter_.size() / 8);
|
LOG_INFO << "bloom filter size: " << size_with_unit(filter_.size() / 8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -219,6 +219,7 @@ struct level_defaults {
|
|||||||
char const* schema_compression;
|
char const* schema_compression;
|
||||||
char const* metadata_compression;
|
char const* metadata_compression;
|
||||||
unsigned window_size;
|
unsigned window_size;
|
||||||
|
unsigned window_step;
|
||||||
char const* order;
|
char const* order;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -308,16 +309,16 @@ struct level_defaults {
|
|||||||
|
|
||||||
constexpr std::array<level_defaults, 10> levels{{
|
constexpr std::array<level_defaults, 10> levels{{
|
||||||
// clang-format off
|
// clang-format off
|
||||||
/* 0 */ {20, "null", "null" , "null", 0, "none"},
|
/* 0 */ {20, "null", "null" , "null", 0, 0, "none"},
|
||||||
/* 1 */ {20, ALG_DATA_1, ALG_SCHEMA, "null", 0, "path"},
|
/* 1 */ {20, ALG_DATA_1, ALG_SCHEMA, "null", 0, 0, "path"},
|
||||||
/* 2 */ {20, ALG_DATA_2, ALG_SCHEMA, "null", 0, "path"},
|
/* 2 */ {20, ALG_DATA_2, ALG_SCHEMA, "null", 0, 0, "path"},
|
||||||
/* 3 */ {21, ALG_DATA_3, ALG_SCHEMA, "null", 12, "similarity"},
|
/* 3 */ {21, ALG_DATA_3, ALG_SCHEMA, "null", 12, 1, "similarity"},
|
||||||
/* 4 */ {22, ALG_DATA_4, ALG_SCHEMA, "null", 12, "similarity"},
|
/* 4 */ {22, ALG_DATA_4, ALG_SCHEMA, "null", 12, 2, "similarity"},
|
||||||
/* 5 */ {23, ALG_DATA_5, ALG_SCHEMA, "null", 12, "similarity"},
|
/* 5 */ {23, ALG_DATA_5, ALG_SCHEMA, "null", 12, 2, "similarity"},
|
||||||
/* 6 */ {24, ALG_DATA_6, ALG_SCHEMA, "null", 12, "nilsimsa"},
|
/* 6 */ {24, ALG_DATA_6, ALG_SCHEMA, "null", 12, 3, "nilsimsa"},
|
||||||
/* 7 */ {24, ALG_DATA_7, ALG_SCHEMA, ALG_METADATA_7, 12, "nilsimsa"},
|
/* 7 */ {24, ALG_DATA_7, ALG_SCHEMA, ALG_METADATA_7, 12, 3, "nilsimsa"},
|
||||||
/* 8 */ {24, ALG_DATA_8, ALG_SCHEMA, ALG_METADATA_9, 12, "nilsimsa"},
|
/* 8 */ {24, ALG_DATA_8, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa"},
|
||||||
/* 9 */ {26, ALG_DATA_9, ALG_SCHEMA, ALG_METADATA_9, 12, "nilsimsa"},
|
/* 9 */ {26, ALG_DATA_9, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa"},
|
||||||
// clang-format on
|
// clang-format on
|
||||||
}};
|
}};
|
||||||
|
|
||||||
@ -377,8 +378,7 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
po::value<unsigned>(&cfg.blockhash_window_size),
|
po::value<unsigned>(&cfg.blockhash_window_size),
|
||||||
"window sizes for block hashing")
|
"window sizes for block hashing")
|
||||||
("window-step",
|
("window-step",
|
||||||
po::value<unsigned>(&cfg.window_increment_shift)
|
po::value<unsigned>(&cfg.window_increment_shift),
|
||||||
->default_value(1),
|
|
||||||
"window step (as right shift of size)")
|
"window step (as right shift of size)")
|
||||||
("bloom-filter-size",
|
("bloom-filter-size",
|
||||||
po::value<unsigned>(&cfg.bloom_filter_size)->default_value(5),
|
po::value<unsigned>(&cfg.bloom_filter_size)->default_value(5),
|
||||||
@ -482,7 +482,7 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
l_or = std::max(l_or, ::strlen(l.order));
|
l_or = std::max(l_or, ::strlen(l.order));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string sep(28 + l_dc + l_sc + l_mc + l_or, '-');
|
std::string sep(30 + l_dc + l_sc + l_mc + l_or, '-');
|
||||||
|
|
||||||
std::cout << "mkdwarfs (" << PRJ_GIT_ID << ")\n\n" << opts << std::endl;
|
std::cout << "mkdwarfs (" << PRJ_GIT_ID << ")\n\n" << opts << std::endl;
|
||||||
std::cout << "Compression level defaults:\n"
|
std::cout << "Compression level defaults:\n"
|
||||||
@ -492,17 +492,17 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
"Window")
|
"Window")
|
||||||
<< fmt::format(" Size {:{}s} {:{}s} {:{}s} {:6s}\n",
|
<< fmt::format(" Size {:{}s} {:{}s} {:{}s} {:6s}\n",
|
||||||
"Block Data", l_dc, "Schema", l_sc, "Metadata",
|
"Block Data", l_dc, "Schema", l_sc, "Metadata",
|
||||||
l_mc, "Size Order")
|
l_mc, "Size/Step Order")
|
||||||
<< " " << sep << std::endl;
|
<< " " << sep << std::endl;
|
||||||
|
|
||||||
int level = 0;
|
int level = 0;
|
||||||
for (auto const& l : levels) {
|
for (auto const& l : levels) {
|
||||||
std::cout << fmt::format(" {:1d} {:2d} {:{}s} {:{}s} {:{}s}"
|
std::cout << fmt::format(" {:1d} {:2d} {:{}s} {:{}s} {:{}s}"
|
||||||
" {:2d} {:{}s}",
|
" {:2d} / {:1d} {:{}s}",
|
||||||
level, l.block_size_bits, l.data_compression,
|
level, l.block_size_bits, l.data_compression,
|
||||||
l_dc, l.schema_compression, l_sc,
|
l_dc, l.schema_compression, l_sc,
|
||||||
l.metadata_compression, l_mc, l.window_size,
|
l.metadata_compression, l_mc, l.window_size,
|
||||||
l.order, l_or)
|
l.window_step, l.order, l_or)
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
++level;
|
++level;
|
||||||
}
|
}
|
||||||
@ -562,6 +562,10 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
cfg.blockhash_window_size = defaults.window_size;
|
cfg.blockhash_window_size = defaults.window_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!vm.count("window-step")) {
|
||||||
|
cfg.window_increment_shift = defaults.window_step;
|
||||||
|
}
|
||||||
|
|
||||||
if (!vm.count("order")) {
|
if (!vm.count("order")) {
|
||||||
order = defaults.order;
|
order = defaults.order;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user