nilsimsa2 -> nilsimsa

This commit is contained in:
Marcus Holland-Moritz 2023-08-12 20:37:16 +02:00
parent 2546cc94f4
commit c42d168726
6 changed files with 28 additions and 28 deletions

View File

@ -98,13 +98,13 @@ struct filesystem_writer_options {
}; };
// TODO: rename // TODO: rename
enum class file_order_mode { NONE, PATH, SCRIPT, SIMILARITY, NILSIMSA2 }; enum class file_order_mode { NONE, PATH, SCRIPT, SIMILARITY, NILSIMSA };
// TODO: rename // TODO: rename
struct file_order_options { struct file_order_options {
file_order_mode mode{file_order_mode::NONE}; file_order_mode mode{file_order_mode::NONE};
int nilsimsa2_max_children{8192}; int nilsimsa_max_children{8192};
int nilsimsa2_max_cluster_size{8192}; int nilsimsa_max_cluster_size{8192};
}; };
struct inode_options { struct inode_options {

View File

@ -40,7 +40,7 @@ const std::map<std::string_view, file_order_mode> order_choices{
{"script", file_order_mode::SCRIPT}, {"script", file_order_mode::SCRIPT},
#endif #endif
{"similarity", file_order_mode::SIMILARITY}, {"similarity", file_order_mode::SIMILARITY},
{"nilsimsa2", file_order_mode::NILSIMSA2}, {"nilsimsa", file_order_mode::NILSIMSA},
}; };
void parse_order_option(std::string_view ordname, std::string_view opt, void parse_order_option(std::string_view ordname, std::string_view opt,
@ -99,18 +99,18 @@ file_order_options fragment_order_parser::parse(std::string_view arg) const {
auto ordname = order_opts[0]; auto ordname = order_opts[0];
switch (rv.mode) { switch (rv.mode) {
case file_order_mode::NILSIMSA2: case file_order_mode::NILSIMSA:
if (order_opts.size() > 4) { if (order_opts.size() > 4) {
throw std::runtime_error(fmt::format( throw std::runtime_error(fmt::format(
"too many options for inode order mode '{}'", ordname)); "too many options for inode order mode '{}'", ordname));
} }
parse_order_option(ordname, order_opts[1], rv.nilsimsa2_max_children, parse_order_option(ordname, order_opts[1], rv.nilsimsa_max_children,
"max_children", 0); "max_children", 0);
if (order_opts.size() > 2) { if (order_opts.size() > 2) {
parse_order_option(ordname, order_opts[2], parse_order_option(ordname, order_opts[2],
rv.nilsimsa2_max_cluster_size, "max_cluster_size", rv.nilsimsa_max_cluster_size, "max_cluster_size",
0); 0);
} }
break; break;
@ -142,10 +142,10 @@ fragment_order_parser::to_string(file_order_options const& opts) const {
case file_order_mode::SIMILARITY: case file_order_mode::SIMILARITY:
return "similarity"; return "similarity";
case file_order_mode::NILSIMSA2: case file_order_mode::NILSIMSA:
return fmt::format("nilsimsa2 (max_children={}, max_cluster_size={})", return fmt::format("nilsimsa (max_children={}, max_cluster_size={})",
opts.nilsimsa2_max_children, opts.nilsimsa_max_children,
opts.nilsimsa2_max_cluster_size); opts.nilsimsa_max_cluster_size);
} }
return "<unknown>"; return "<unknown>";
} }

View File

@ -345,7 +345,7 @@ class inode_ : public inode {
case file_order_mode::SIMILARITY: case file_order_mode::SIMILARITY:
sc.try_emplace(f.category()); sc.try_emplace(f.category());
break; break;
case file_order_mode::NILSIMSA2: case file_order_mode::NILSIMSA:
nc.try_emplace(f.category()); nc.try_emplace(f.category());
break; break;
} }
@ -407,7 +407,7 @@ class inode_ : public inode {
similarity_.emplace<uint32_t>(sc.finalize()); similarity_.emplace<uint32_t>(sc.finalize());
} break; } break;
case file_order_mode::NILSIMSA2: { case file_order_mode::NILSIMSA: {
nilsimsa nc; nilsimsa nc;
scan_range(mm, 0, mm->size(), nc); scan_range(mm, 0, mm->size(), nc);
// TODO: can we finalize in-place? // TODO: can we finalize in-place?
@ -543,7 +543,7 @@ class inode_manager_ final : public inode_manager::impl {
return opts.fragment_order.any_is([](auto const& order) { return opts.fragment_order.any_is([](auto const& order) {
return order.mode == file_order_mode::SIMILARITY || return order.mode == file_order_mode::SIMILARITY ||
order.mode == file_order_mode::NILSIMSA2; order.mode == file_order_mode::NILSIMSA;
}); });
} }
@ -568,7 +568,7 @@ class inode_manager_ final : public inode_manager::impl {
void presort_index(std::vector<std::shared_ptr<inode>>& inodes, void presort_index(std::vector<std::shared_ptr<inode>>& inodes,
std::vector<uint32_t>& index); std::vector<uint32_t>& index);
void order_inodes_by_nilsimsa2(worker_group& wg); void order_inodes_by_nilsimsa(worker_group& wg);
LOG_PROXY_DECL(LoggerPolicy); LOG_PROXY_DECL(LoggerPolicy);
std::vector<std::shared_ptr<inode>> inodes_; std::vector<std::shared_ptr<inode>> inodes_;
@ -645,11 +645,11 @@ void inode_manager_<LoggerPolicy>::order_inodes(
break; break;
} }
case file_order_mode::NILSIMSA2: { case file_order_mode::NILSIMSA: {
LOG_INFO << "ordering " << count() LOG_INFO << "ordering " << count()
<< " inodes using new nilsimsa similarity..."; << " inodes using new nilsimsa similarity...";
auto ti = LOG_CPU_TIMED_INFO; auto ti = LOG_CPU_TIMED_INFO;
order_inodes_by_nilsimsa2(wg); order_inodes_by_nilsimsa(wg);
ti << count() << " inodes ordered"; ti << count() << " inodes ordered";
break; break;
} }
@ -703,11 +703,11 @@ void inode_manager_<LoggerPolicy>::presort_index(
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
void inode_manager_<LoggerPolicy>::order_inodes_by_nilsimsa2(worker_group& wg) { void inode_manager_<LoggerPolicy>::order_inodes_by_nilsimsa(worker_group& wg) {
auto const& file_order = opts_.fragment_order.get(); // TODO auto const& file_order = opts_.fragment_order.get(); // TODO
similarity_ordering_options opts; similarity_ordering_options opts;
opts.max_children = file_order.nilsimsa2_max_children; opts.max_children = file_order.nilsimsa_max_children;
opts.max_cluster_size = file_order.nilsimsa2_max_cluster_size; opts.max_cluster_size = file_order.nilsimsa_max_cluster_size;
auto span = sortable_span(); auto span = sortable_span();
span.all(); span.all();

View File

@ -45,8 +45,8 @@ std::ostream& operator<<(std::ostream& os, file_order_mode mode) {
case file_order_mode::SIMILARITY: case file_order_mode::SIMILARITY:
modestr = "similarity"; modestr = "similarity";
break; break;
case file_order_mode::NILSIMSA2: case file_order_mode::NILSIMSA:
modestr = "nilsimsa2"; modestr = "nilsimsa";
break; break;
default: default:
break; break;

View File

@ -258,10 +258,10 @@ constexpr std::array<level_defaults, 10> levels{{
/* 3 */ {21, ALG_DATA_3, ALG_SCHEMA, "null", 12, 1, "similarity"}, /* 3 */ {21, ALG_DATA_3, ALG_SCHEMA, "null", 12, 1, "similarity"},
/* 4 */ {22, ALG_DATA_4, ALG_SCHEMA, "null", 12, 2, "similarity"}, /* 4 */ {22, ALG_DATA_4, ALG_SCHEMA, "null", 12, 2, "similarity"},
/* 5 */ {23, ALG_DATA_5, ALG_SCHEMA, "null", 12, 2, "similarity"}, /* 5 */ {23, ALG_DATA_5, ALG_SCHEMA, "null", 12, 2, "similarity"},
/* 6 */ {24, ALG_DATA_6, ALG_SCHEMA, "null", 12, 3, "nilsimsa2"}, /* 6 */ {24, ALG_DATA_6, ALG_SCHEMA, "null", 12, 3, "nilsimsa"},
/* 7 */ {24, ALG_DATA_7, ALG_SCHEMA, ALG_METADATA_7, 12, 3, "nilsimsa2"}, /* 7 */ {24, ALG_DATA_7, ALG_SCHEMA, ALG_METADATA_7, 12, 3, "nilsimsa"},
/* 8 */ {24, ALG_DATA_8, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa2"}, /* 8 */ {24, ALG_DATA_8, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa"},
/* 9 */ {26, ALG_DATA_9, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa2"}, /* 9 */ {26, ALG_DATA_9, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa"},
// clang-format on // clang-format on
}}; }};

View File

@ -157,7 +157,7 @@ void basic_end_to_end_test(std::string const& compressor,
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage)); auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));
bool similarity = file_order == file_order_mode::SIMILARITY || bool similarity = file_order == file_order_mode::SIMILARITY ||
file_order == file_order_mode::NILSIMSA2; file_order == file_order_mode::NILSIMSA;
size_t const num_fail_empty = access_fail ? 1 : 0; size_t const num_fail_empty = access_fail ? 1 : 0;
@ -598,7 +598,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Combine( ::testing::Combine(
::testing::ValuesIn(compressions), ::testing::Values(12, 15, 20, 28), ::testing::ValuesIn(compressions), ::testing::Values(12, 15, 20, 28),
::testing::Values(file_order_mode::NONE, file_order_mode::PATH, ::testing::Values(file_order_mode::NONE, file_order_mode::PATH,
file_order_mode::SCRIPT, file_order_mode::NILSIMSA2, file_order_mode::SCRIPT, file_order_mode::NILSIMSA,
file_order_mode::SIMILARITY), file_order_mode::SIMILARITY),
::testing::Values(std::nullopt, "xxh3-128"))); ::testing::Values(std::nullopt, "xxh3-128")));