mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 11:59:48 -04:00
Add reverse path ordering
This commit is contained in:
parent
1e80009d45
commit
7be9aa7585
@ -251,14 +251,18 @@ Most other options are concerned with compression tuning:
|
||||
"normalize" the permissions across the file system; this is equivalent to
|
||||
using `--chmod=ug-st,=Xr`.
|
||||
|
||||
- `--order=none`|`path`|`similarity`|`nilsimsa`[`:`*max-children*[`:`*max-cluster-size*]]:
|
||||
- `--order=none`|`path`|`revpath`|`similarity`|`nilsimsa`[`:`*max-children*[`:`*max-cluster-size*]]:
|
||||
The order in which inodes will be written to the file system. Choosing `none`,
|
||||
the inodes will be stored in the order in which they are discovered. With
|
||||
`path`, they will be sorted asciibetically by path name of the first file
|
||||
representing this inode. With `similarity`, they will be ordered using a
|
||||
simple, yet fast and efficient, similarity hash function. `nilsimsa` ordering
|
||||
uses a more sophisticated similarity function that is typically better than
|
||||
`similarity`, but it's significantly slower to determine a good ordering.
|
||||
representing this inode. With `revpath`, they will also be ordered by path
|
||||
name, but the path is being traversed from the leaf to the root, i.e. files
|
||||
with the same name will be sorted next to each other. With `similarity`, they
|
||||
will be ordered using a simple, yet fast and efficient, similarity hash
|
||||
function.
|
||||
`nilsimsa` ordering uses a more sophisticated similarity function that is
|
||||
typically better than `similarity`, but can be significantly slower to
|
||||
determine a good ordering.
|
||||
However, the new implementation of this algorithm can be parallelized and
|
||||
will perform much better on huge numbers of files. `nilsimsa` ordering can
|
||||
be tweaked by specifying a *max-children* and *max-cluster-size*. Both options
|
||||
|
@ -44,6 +44,10 @@ class inode_ordering {
|
||||
|
||||
void by_path(sortable_inode_span& sp) const { impl_->by_path(sp); }
|
||||
|
||||
void by_reverse_path(sortable_inode_span& sp) const {
|
||||
impl_->by_reverse_path(sp);
|
||||
}
|
||||
|
||||
void
|
||||
by_similarity(sortable_inode_span& sp,
|
||||
std::optional<fragment_category> cat = std::nullopt) const {
|
||||
@ -62,6 +66,7 @@ class inode_ordering {
|
||||
|
||||
virtual void by_inode_number(sortable_inode_span& sp) const = 0;
|
||||
virtual void by_path(sortable_inode_span& sp) const = 0;
|
||||
virtual void by_reverse_path(sortable_inode_span& sp) const = 0;
|
||||
virtual void by_similarity(sortable_inode_span& sp,
|
||||
std::optional<fragment_category> cat) const = 0;
|
||||
virtual void
|
||||
|
@ -78,7 +78,7 @@ struct filesystem_writer_options {
|
||||
};
|
||||
|
||||
// TODO: rename
|
||||
enum class file_order_mode { NONE, PATH, SIMILARITY, NILSIMSA };
|
||||
enum class file_order_mode { NONE, PATH, REVPATH, SIMILARITY, NILSIMSA };
|
||||
|
||||
// TODO: rename
|
||||
struct file_order_options {
|
||||
|
@ -36,6 +36,7 @@ namespace {
|
||||
const std::map<std::string_view, file_order_mode> order_choices{
|
||||
{"none", file_order_mode::NONE},
|
||||
{"path", file_order_mode::PATH},
|
||||
{"revpath", file_order_mode::REVPATH},
|
||||
{"similarity", file_order_mode::SIMILARITY},
|
||||
{"nilsimsa", file_order_mode::NILSIMSA},
|
||||
};
|
||||
@ -133,6 +134,9 @@ fragment_order_parser::to_string(file_order_options const& opts) const {
|
||||
case file_order_mode::PATH:
|
||||
return "path";
|
||||
|
||||
case file_order_mode::REVPATH:
|
||||
return "revpath";
|
||||
|
||||
case file_order_mode::SIMILARITY:
|
||||
return "similarity";
|
||||
|
||||
|
@ -336,6 +336,7 @@ class inode_ : public inode {
|
||||
switch (opts.fragment_order.get(f.category()).mode) {
|
||||
case file_order_mode::NONE:
|
||||
case file_order_mode::PATH:
|
||||
case file_order_mode::REVPATH:
|
||||
break;
|
||||
case file_order_mode::SIMILARITY:
|
||||
sc.try_emplace(f.category());
|
||||
@ -391,6 +392,7 @@ class inode_ : public inode {
|
||||
switch (order_mode) {
|
||||
case file_order_mode::NONE:
|
||||
case file_order_mode::PATH:
|
||||
case file_order_mode::REVPATH:
|
||||
break;
|
||||
|
||||
case file_order_mode::SIMILARITY: {
|
||||
@ -616,6 +618,15 @@ auto inode_manager_<LoggerPolicy>::ordered_span(fragment_category cat,
|
||||
break;
|
||||
}
|
||||
|
||||
case file_order_mode::REVPATH: {
|
||||
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||
<< " inodes by reverse path name...";
|
||||
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||
order.by_reverse_path(span);
|
||||
tv << prefix << span.size() << " inodes ordered";
|
||||
break;
|
||||
}
|
||||
|
||||
case file_order_mode::SIMILARITY: {
|
||||
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||
<< " inodes by similarity...";
|
||||
|
@ -42,6 +42,7 @@ class inode_ordering_ final : public inode_ordering::impl {
|
||||
|
||||
void by_inode_number(sortable_inode_span& sp) const override;
|
||||
void by_path(sortable_inode_span& sp) const override;
|
||||
void by_reverse_path(sortable_inode_span& sp) const override;
|
||||
void by_similarity(sortable_inode_span& sp,
|
||||
std::optional<fragment_category> cat) const override;
|
||||
void by_nilsimsa(worker_group& wg, similarity_ordering_options const& opts,
|
||||
@ -78,6 +79,17 @@ void inode_ordering_<LoggerPolicy>::by_path(sortable_inode_span& sp) const {
|
||||
[&](auto a, auto b) { return paths[a] < paths[b]; });
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void inode_ordering_<LoggerPolicy>::by_reverse_path(
|
||||
sortable_inode_span& sp) const {
|
||||
auto raw = sp.raw();
|
||||
auto& index = sp.index();
|
||||
|
||||
std::sort(index.begin(), index.end(), [&](auto a, auto b) {
|
||||
return raw[a]->any()->less_revpath(*raw[b]->any());
|
||||
});
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void inode_ordering_<LoggerPolicy>::by_similarity(
|
||||
sortable_inode_span& sp, std::optional<fragment_category> cat) const {
|
||||
|
@ -604,13 +604,12 @@ TEST_P(packing_test, regression_empty_fs) {
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
dwarfs, compression_test,
|
||||
::testing::Combine(::testing::ValuesIn(compressions),
|
||||
::testing::Values(12, 15, 20, 28),
|
||||
::testing::Values(file_order_mode::NONE,
|
||||
file_order_mode::PATH,
|
||||
file_order_mode::NILSIMSA,
|
||||
file_order_mode::SIMILARITY),
|
||||
::testing::Values(std::nullopt, "xxh3-128")));
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(compressions), ::testing::Values(12, 15, 20, 28),
|
||||
::testing::Values(file_order_mode::NONE, file_order_mode::PATH,
|
||||
file_order_mode::REVPATH, file_order_mode::NILSIMSA,
|
||||
file_order_mode::SIMILARITY),
|
||||
::testing::Values(std::nullopt, "xxh3-128")));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
dwarfs, scanner_test,
|
||||
@ -806,6 +805,7 @@ TEST_P(file_scanner, inode_ordering) {
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
dwarfs, file_scanner,
|
||||
::testing::Combine(::testing::Values(file_order_mode::PATH,
|
||||
file_order_mode::REVPATH,
|
||||
file_order_mode::SIMILARITY),
|
||||
::testing::Values(std::nullopt, "xxh3-128")));
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user