From 63c9e9a3c3b6acab3926dadaf888c43e663258d3 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 7 Dec 2020 22:16:31 +0100 Subject: [PATCH] Add support for nilsimsa-based inode ordering Also refactor inode manager to take full control of ordering. Nilsimsa ordering is really slow, but has the advantage that inodes are added continuously while ordering. This allows us to perform the bulk of the ordering in the background while already running segmenting/compression. --- include/dwarfs/entry.h | 10 +- include/dwarfs/inode.h | 5 + include/dwarfs/inode_manager.h | 42 +++- include/dwarfs/options.h | 10 +- src/dwarfs/entry.cpp | 20 +- src/dwarfs/inode_manager.cpp | 353 +++++++++++++++++++++++++++------ src/dwarfs/options.cpp | 3 + src/dwarfs/scanner.cpp | 79 +++----- src/mkdwarfs.cpp | 13 +- test/dwarfs.cpp | 16 +- 10 files changed, 393 insertions(+), 158 deletions(-) diff --git a/include/dwarfs/entry.h b/include/dwarfs/entry.h index 2607bfe5..ffd77935 100644 --- a/include/dwarfs/entry.h +++ b/include/dwarfs/entry.h @@ -109,9 +109,8 @@ class entry : public entry_interface { class file : public entry { public: file(const std::string& name, std::shared_ptr parent, - const struct ::stat& st, bool with_similarity) - : entry(name, parent, st) - , with_similarity_(with_similarity) {} + const struct ::stat& st) + : entry(name, parent, st) {} type_t type() const override; std::string_view hash() const; @@ -119,14 +118,11 @@ class file : public entry { std::shared_ptr get_inode() const; void accept(entry_visitor& v, bool preorder) override; uint32_t inode_num() const override; - uint32_t similarity_hash() const { return similarity_hash_; } void scan(os_access& os, progress& prog) override; private: using hash_type = std::array; - uint32_t similarity_hash_{0}; - const bool with_similarity_; hash_type hash_{0}; std::shared_ptr inode_; }; @@ -195,7 +191,7 @@ class device : public entry { class entry_factory { public: - static std::unique_ptr create(bool with_similarity = false); + static std::unique_ptr create(); virtual ~entry_factory() = default; diff --git a/include/dwarfs/inode.h b/include/dwarfs/inode.h index 8a157e39..812cbf57 100644 --- a/include/dwarfs/inode.h +++ b/include/dwarfs/inode.h @@ -35,15 +35,20 @@ struct chunk; } class file; +class os_access; + +struct inode_options; class inode : public object { public: using files_vector = folly::small_vector; virtual void set_files(files_vector&& fv) = 0; + virtual void scan(os_access& os, inode_options const& options) = 0; virtual void set_num(uint32_t num) = 0; virtual uint32_t num() const = 0; virtual uint32_t similarity_hash() const = 0; + virtual std::vector const& nilsimsa_similarity_hash() const = 0; virtual size_t size() const = 0; virtual file const* any() const = 0; virtual files_vector const& files() const = 0; diff --git a/include/dwarfs/inode_manager.h b/include/dwarfs/inode_manager.h index 42e83ca6..c6351a44 100644 --- a/include/dwarfs/inode_manager.h +++ b/include/dwarfs/inode_manager.h @@ -25,23 +25,45 @@ #include #include +#include "dwarfs/options.h" + namespace dwarfs { class inode; +class logger; class script; class inode_manager { public: - static std::unique_ptr create(); + using inode_cb = std::function const&)>; - virtual ~inode_manager() = default; - virtual std::shared_ptr create_inode() = 0; - virtual size_t count() const = 0; - virtual void order_inodes() = 0; - virtual void order_inodes(std::shared_ptr