mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-04 02:06:22 -04:00
feat: add support for explicit inode ordering
This commit is contained in:
parent
c061a5253d
commit
1c7cbec1ee
@ -23,14 +23,22 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace dwarfs::writer {
|
||||
|
||||
// TODO: rename? -> inode_order_mode / fragment_order_mode
|
||||
enum class fragment_order_mode { NONE, PATH, REVPATH, SIMILARITY, NILSIMSA };
|
||||
enum class fragment_order_mode {
|
||||
NONE,
|
||||
PATH,
|
||||
REVPATH,
|
||||
SIMILARITY,
|
||||
NILSIMSA,
|
||||
EXPLICIT
|
||||
};
|
||||
|
||||
// TODO: rename? -> inode_order_options / fragment_order_options
|
||||
struct fragment_order_options {
|
||||
static constexpr int const kDefaultNilsimsaMaxChildren{16384};
|
||||
static constexpr int const kDefaultNilsimsaMaxClusterSize{16384};
|
||||
@ -38,6 +46,8 @@ struct fragment_order_options {
|
||||
fragment_order_mode mode{fragment_order_mode::NONE};
|
||||
int nilsimsa_max_children{kDefaultNilsimsaMaxChildren};
|
||||
int nilsimsa_max_cluster_size{kDefaultNilsimsaMaxClusterSize};
|
||||
std::string explicit_order_file{};
|
||||
std::unordered_map<std::filesystem::path, size_t> explicit_order{};
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, fragment_order_mode mode);
|
||||
|
@ -24,6 +24,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <filesystem>
|
||||
#include <functional>
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
@ -77,7 +78,9 @@ class inode_manager {
|
||||
size_t total_size{0};
|
||||
};
|
||||
|
||||
inode_manager(logger& lgr, progress& prog, inode_options const& opts);
|
||||
inode_manager(logger& lgr, progress& prog,
|
||||
std::filesystem::path const& root_path,
|
||||
inode_options const& opts);
|
||||
|
||||
std::shared_ptr<inode> create_inode() { return impl_->create_inode(); }
|
||||
|
||||
|
@ -23,7 +23,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <dwarfs/writer/internal/inode.h>
|
||||
|
||||
@ -40,6 +42,7 @@ class worker_group;
|
||||
namespace writer {
|
||||
|
||||
struct inode_options;
|
||||
struct fragment_order_options;
|
||||
|
||||
namespace internal {
|
||||
|
||||
@ -71,6 +74,12 @@ class inode_ordering {
|
||||
impl_->by_nilsimsa(wg, opts, sp, cat);
|
||||
}
|
||||
|
||||
void by_explicit_order(sortable_inode_span& sp,
|
||||
std::filesystem::path const& root_path,
|
||||
fragment_order_options const& opts) const {
|
||||
impl_->by_explicit_order(sp, root_path, opts);
|
||||
}
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
@ -84,6 +93,10 @@ class inode_ordering {
|
||||
by_nilsimsa(dwarfs::internal::worker_group& wg,
|
||||
similarity_ordering_options const& opts,
|
||||
sortable_inode_span& sp, fragment_category cat) const = 0;
|
||||
virtual void
|
||||
by_explicit_order(sortable_inode_span& sp,
|
||||
std::filesystem::path const& root_path,
|
||||
fragment_order_options const& opts) const = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -50,6 +50,9 @@ std::ostream& operator<<(std::ostream& os, fragment_order_mode mode) {
|
||||
case fragment_order_mode::NILSIMSA:
|
||||
modestr = "nilsimsa";
|
||||
break;
|
||||
case fragment_order_mode::EXPLICIT:
|
||||
modestr = "explicit";
|
||||
break;
|
||||
}
|
||||
|
||||
return os << modestr;
|
||||
|
@ -22,6 +22,7 @@
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
@ -47,6 +48,7 @@ constexpr std::array order_choices{
|
||||
std::pair{"revpath"sv, fragment_order_mode::REVPATH},
|
||||
std::pair{"similarity"sv, fragment_order_mode::SIMILARITY},
|
||||
std::pair{"nilsimsa"sv, fragment_order_mode::NILSIMSA},
|
||||
std::pair{"explicit"sv, fragment_order_mode::EXPLICIT},
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -95,6 +97,21 @@ fragment_order_parser::parse(std::string_view arg) const {
|
||||
}
|
||||
break;
|
||||
|
||||
case fragment_order_mode::EXPLICIT: {
|
||||
auto file = om.get<std::string>("file");
|
||||
std::ifstream ifs{file};
|
||||
if (!ifs) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("failed to open explicit order file '{}'", file));
|
||||
}
|
||||
std::string line;
|
||||
while (std::getline(ifs, line)) {
|
||||
auto const path = std::filesystem::path{line}.relative_path();
|
||||
rv.explicit_order[path] = rv.explicit_order.size();
|
||||
}
|
||||
rv.explicit_order_file = std::move(file);
|
||||
} break;
|
||||
|
||||
default:
|
||||
throw std::runtime_error(
|
||||
fmt::format("inode order mode '{}' does not support options", algo));
|
||||
@ -125,6 +142,9 @@ fragment_order_parser::to_string(fragment_order_options const& opts) const {
|
||||
return fmt::format("nilsimsa:max_children={}:max_cluster_size={}",
|
||||
opts.nilsimsa_max_children,
|
||||
opts.nilsimsa_max_cluster_size);
|
||||
|
||||
case fragment_order_mode::EXPLICIT:
|
||||
return fmt::format("explicit:file={}", opts.explicit_order_file);
|
||||
}
|
||||
return "<unknown>";
|
||||
}
|
||||
|
@ -68,6 +68,7 @@
|
||||
namespace dwarfs::writer::internal {
|
||||
|
||||
using namespace dwarfs::internal;
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
@ -430,6 +431,7 @@ class inode_ : public inode {
|
||||
case fragment_order_mode::NONE:
|
||||
case fragment_order_mode::PATH:
|
||||
case fragment_order_mode::REVPATH:
|
||||
case fragment_order_mode::EXPLICIT:
|
||||
break;
|
||||
case fragment_order_mode::SIMILARITY:
|
||||
sc.try_emplace(cat);
|
||||
@ -493,6 +495,7 @@ class inode_ : public inode {
|
||||
case fragment_order_mode::NONE:
|
||||
case fragment_order_mode::PATH:
|
||||
case fragment_order_mode::REVPATH:
|
||||
case fragment_order_mode::EXPLICIT:
|
||||
break;
|
||||
|
||||
case fragment_order_mode::SIMILARITY: {
|
||||
@ -545,9 +548,11 @@ class inode_ : public inode {
|
||||
template <typename LoggerPolicy>
|
||||
class inode_manager_ final : public inode_manager::impl {
|
||||
public:
|
||||
inode_manager_(logger& lgr, progress& prog, inode_options const& opts)
|
||||
inode_manager_(logger& lgr, progress& prog, fs::path const& root_path,
|
||||
inode_options const& opts)
|
||||
: LOG_PROXY_INIT(lgr)
|
||||
, prog_(prog)
|
||||
, root_path_{root_path}
|
||||
, opts_{opts}
|
||||
, inodes_need_scanning_{inodes_need_scanning(opts_)} {}
|
||||
|
||||
@ -657,6 +662,7 @@ class inode_manager_ final : public inode_manager::impl {
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
std::vector<std::shared_ptr<inode>> inodes_;
|
||||
progress& prog_;
|
||||
fs::path const root_path_;
|
||||
inode_options opts_;
|
||||
bool const inodes_need_scanning_;
|
||||
std::atomic<size_t> mutable num_invalid_inodes_{0};
|
||||
@ -816,14 +822,25 @@ auto inode_manager_<LoggerPolicy>::ordered_span(fragment_category cat,
|
||||
tv << prefix << span.size() << " inodes ordered";
|
||||
break;
|
||||
}
|
||||
|
||||
case fragment_order_mode::EXPLICIT: {
|
||||
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||
<< " inodes by explicit order...";
|
||||
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||
order.by_explicit_order(span, root_path_, opts);
|
||||
tv << prefix << span.size() << " inodes ordered";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return span;
|
||||
}
|
||||
|
||||
inode_manager::inode_manager(logger& lgr, progress& prog,
|
||||
fs::path const& root_path,
|
||||
inode_options const& opts)
|
||||
: impl_(make_unique_logging_object<impl, internal::inode_manager_,
|
||||
logger_policies>(lgr, prog, opts)) {}
|
||||
logger_policies>(lgr, prog, root_path,
|
||||
opts)) {}
|
||||
|
||||
} // namespace dwarfs::writer::internal
|
||||
|
@ -36,6 +36,7 @@
|
||||
namespace dwarfs::writer::internal {
|
||||
|
||||
using namespace dwarfs::internal;
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
@ -63,6 +64,8 @@ class inode_ordering_ final : public inode_ordering::impl {
|
||||
void
|
||||
by_nilsimsa(worker_group& wg, similarity_ordering_options const& opts,
|
||||
sortable_inode_span& sp, fragment_category cat) const override;
|
||||
void by_explicit_order(sortable_inode_span& sp, fs::path const& root_path,
|
||||
fragment_order_options const& opts) const override;
|
||||
|
||||
private:
|
||||
void
|
||||
@ -207,6 +210,46 @@ void inode_ordering_<LoggerPolicy>::by_nilsimsa_impl(
|
||||
future.get().swap(index);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void inode_ordering_<LoggerPolicy>::by_explicit_order(
|
||||
sortable_inode_span& sp, fs::path const& root_path,
|
||||
fragment_order_options const& opts) const {
|
||||
auto raw = sp.raw();
|
||||
auto& index = sp.index();
|
||||
auto const& order = opts.explicit_order;
|
||||
|
||||
if (order.empty()) {
|
||||
LOG_WARN << "empty explicit order file set";
|
||||
}
|
||||
|
||||
std::vector<std::filesystem::path> paths;
|
||||
std::vector<std::optional<size_t>> path_order;
|
||||
paths.resize(raw.size());
|
||||
path_order.resize(raw.size());
|
||||
|
||||
for (auto i : index) {
|
||||
paths[i] = fs::relative(raw[i]->any()->fs_path(), root_path);
|
||||
|
||||
if (auto it = order.find(paths[i]); it != order.end()) {
|
||||
path_order[i] = it->second;
|
||||
} else {
|
||||
LOG_DEBUG << "explicit order: " << paths[i]
|
||||
<< " not found in explicit order file";
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(index.begin(), index.end(), [&](auto a, auto b) {
|
||||
auto const& ai = path_order[a];
|
||||
auto const& bi = path_order[b];
|
||||
return ai.has_value() && bi.has_value() ? *ai < *bi
|
||||
: raw[a]->num() < raw[b]->num();
|
||||
});
|
||||
|
||||
for (auto i : index) {
|
||||
LOG_DEBUG << "explicit order: " << paths[i];
|
||||
}
|
||||
}
|
||||
|
||||
inode_ordering::inode_ordering(logger& lgr, progress& prog,
|
||||
inode_options const& opts)
|
||||
: impl_(make_unique_logging_object<impl, internal::inode_ordering_,
|
||||
|
@ -723,7 +723,7 @@ void scanner_<LoggerPolicy>::scan(
|
||||
|
||||
prog.set_status_function(status_string);
|
||||
|
||||
inode_manager im(LOG_GET_LOGGER, prog, options_.inode);
|
||||
inode_manager im(LOG_GET_LOGGER, prog, path, options_.inode);
|
||||
file_scanner fs(LOG_GET_LOGGER, wg_, os_, im, prog,
|
||||
{.hash_algo = options_.file_hash_algorithm,
|
||||
.debug_inode_create = os_.getenv(kEnvVarDumpFilesRaw) ||
|
||||
|
Loading…
x
Reference in New Issue
Block a user