From b49dd782c673eb5f0c9da15cbf8817fd5aef948e Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 12 Aug 2023 10:44:40 +0200 Subject: [PATCH] add inode_ordering --- CMakeLists.txt | 1 + include/dwarfs/inode.h | 4 ++ include/dwarfs/inode_manager.h | 4 ++ include/dwarfs/inode_ordering.h | 54 +++++++++++++++ include/dwarfs/sortable_span.h | 119 ++++++++++++++++++++++++++++++++ src/dwarfs/inode_manager.cpp | 43 ++++-------- src/dwarfs/inode_ordering.cpp | 72 +++++++++++++++++++ 7 files changed, 268 insertions(+), 29 deletions(-) create mode 100644 include/dwarfs/inode_ordering.h create mode 100644 include/dwarfs/sortable_span.h create mode 100644 src/dwarfs/inode_ordering.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 87564129..f7d9914c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -381,6 +381,7 @@ list( src/dwarfs/inode_chunkable.cpp src/dwarfs/inode_fragments.cpp src/dwarfs/inode_manager.cpp + src/dwarfs/inode_ordering.cpp src/dwarfs/inode_reader_v2.cpp src/dwarfs/logger.cpp src/dwarfs/metadata_types.cpp diff --git a/include/dwarfs/inode.h b/include/dwarfs/inode.h index eb0b7d11..6683e6a6 100644 --- a/include/dwarfs/inode.h +++ b/include/dwarfs/inode.h @@ -30,6 +30,7 @@ #include "dwarfs/inode_fragments.h" #include "dwarfs/nilsimsa.h" #include "dwarfs/object.h" +#include "dwarfs/sortable_span.h" namespace dwarfs { @@ -62,4 +63,7 @@ class inode : public object { virtual void dump(std::ostream& os, inode_options const& options) const = 0; }; +using sortable_inode_span = + sortable_span const, uint32_t>; + } // namespace dwarfs diff --git a/include/dwarfs/inode_manager.h b/include/dwarfs/inode_manager.h index 849484da..cdfa9f87 100644 --- a/include/dwarfs/inode_manager.h +++ b/include/dwarfs/inode_manager.h @@ -30,6 +30,7 @@ #include #include "dwarfs/fragment_category.h" +#include "dwarfs/inode.h" namespace dwarfs { @@ -79,6 +80,8 @@ class inode_manager { void dump(std::ostream& os) const { impl_->dump(os); } + sortable_inode_span sortable_span() const { return impl_->sortable_span(); } + class impl { public: virtual ~impl() = default; @@ -96,6 +99,7 @@ class inode_manager { scan_background(worker_group& wg, os_access& os, std::shared_ptr ino, file const* p) const = 0; virtual void dump(std::ostream& os) const = 0; + virtual sortable_inode_span sortable_span() const = 0; }; private: diff --git a/include/dwarfs/inode_ordering.h b/include/dwarfs/inode_ordering.h new file mode 100644 index 00000000..1434fdb6 --- /dev/null +++ b/include/dwarfs/inode_ordering.h @@ -0,0 +1,54 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include + +#include "dwarfs/inode.h" + +namespace dwarfs { + +class logger; + +class inode_ordering { + public: + inode_ordering(logger& lgr); + + void by_inode_number(sortable_inode_span& sp) const { + impl_->by_inode_number(sp); + } + + void by_path(sortable_inode_span& sp) const { impl_->by_path(sp); } + + class impl { + public: + virtual ~impl() = default; + + virtual void by_inode_number(sortable_inode_span& sp) const = 0; + virtual void by_path(sortable_inode_span& sp) const = 0; + }; + + private: + std::unique_ptr impl_; +}; + +} // namespace dwarfs diff --git a/include/dwarfs/sortable_span.h b/include/dwarfs/sortable_span.h new file mode 100644 index 00000000..c9601641 --- /dev/null +++ b/include/dwarfs/sortable_span.h @@ -0,0 +1,119 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include +#include +#include + +#include + +namespace dwarfs { + +template +class sortable_span { + public: + using value_type = T; + using index_value_type = IndexValueType; + + class iterator + : public boost::iterator_facade { + public: + using difference_type = typename boost::iterator_facade< + iterator, value_type, + boost::random_access_traversal_tag>::difference_type; + + iterator() = default; + iterator(iterator const& other) = default; + + private: + friend class boost::iterator_core_access; + friend class sortable_span; + + iterator(sortable_span const* vv, + typename std::vector::iterator it) + : vv_(vv) + , it_(it) {} + + bool equal(iterator const& other) const { + return vv_ == other.vv_ && it_ == other.it_; + } + + void increment() { ++it_; } + + void decrement() { --it_; } + + void advance(difference_type n) { it_ += n; } + + difference_type distance_to(iterator const& other) const { + return other.it_ - it_; + } + + value_type& dereference() const { return vv_->values_[*it_]; } + + sortable_span const* vv_{nullptr}; + typename std::vector::iterator it_; + }; + + explicit sortable_span(std::span values) + : values_{values} {} + + template + void select(P& predicate) { + index_.reserve(values_.size()); + for (size_t i = 0; i < values_.size(); ++i) { + if (predicate(values_[i])) { + index_.push_back(i); + } + } + index_.shrink_to_fit(); + } + + void all() { + index_.resize(values_.size()); + std::iota(index_.begin(), index_.end(), 0); + } + + bool empty() const { return index_.empty(); } + size_t size() const { return index_.size(); } + + value_type const& operator[](size_t i) const { + return proxy(this, index_.begin() + i); + } + + iterator begin() { return iterator(this, index_.begin()); } + + iterator end() { return iterator(this, index_.end()); } + + std::vector& index() { return index_; } + std::vector const& index() const { return index_; } + + std::span raw() const { return values_; } + + private: + std::vector index_; + std::span const values_; +}; + +} // namespace dwarfs diff --git a/src/dwarfs/inode_manager.cpp b/src/dwarfs/inode_manager.cpp index 582ab7ac..5df2045d 100644 --- a/src/dwarfs/inode_manager.cpp +++ b/src/dwarfs/inode_manager.cpp @@ -43,8 +43,8 @@ #include "dwarfs/compiler.h" #include "dwarfs/entry.h" #include "dwarfs/error.h" -#include "dwarfs/inode.h" #include "dwarfs/inode_manager.h" +#include "dwarfs/inode_ordering.h" #include "dwarfs/logger.h" #include "dwarfs/mmif.h" #include "dwarfs/nilsimsa.h" @@ -526,14 +526,11 @@ class inode_manager_ final : public inode_manager::impl { void for_each_inode_in_order( std::function const&)> const& fn) const override { - std::vector index; - index.resize(inodes_.size()); - std::iota(index.begin(), index.end(), size_t(0)); - std::sort(index.begin(), index.end(), [this](size_t a, size_t b) { - return inodes_[a]->num() < inodes_[b]->num(); - }); - for (auto i : index) { - fn(inodes_[i]); + auto span = sortable_span(); + span.all(); + inode_ordering(LOG_GET_LOGGER).by_inode_number(span); + for (auto const& i : span) { + fn(i); } } @@ -585,6 +582,10 @@ class inode_manager_ final : public inode_manager::impl { void dump(std::ostream& os) const override; + sortable_inode_span sortable_span() const override { + return sortable_inode_span(inodes_); + } + private: static bool inodes_need_scanning(inode_options const& opts) { if (opts.categorizer_mgr) { @@ -599,27 +600,11 @@ class inode_manager_ final : public inode_manager::impl { } void order_inodes_by_path() { - std::vector paths; - std::vector index(inodes_.size()); - - paths.reserve(inodes_.size()); - - for (auto const& ino : inodes_) { - paths.emplace_back(ino->any()->path_as_string()); - } - - std::iota(index.begin(), index.end(), size_t(0)); - - std::sort(index.begin(), index.end(), - [&](size_t a, size_t b) { return paths[a] < paths[b]; }); - - std::vector> tmp; - tmp.reserve(inodes_.size()); - - for (size_t ix : index) { - tmp.emplace_back(inodes_[ix]); - } + auto span = sortable_span(); + span.all(); + inode_ordering(LOG_GET_LOGGER).by_path(span); + std::vector> tmp(span.begin(), span.end()); inodes_.swap(tmp); } diff --git a/src/dwarfs/inode_ordering.cpp b/src/dwarfs/inode_ordering.cpp new file mode 100644 index 00000000..2cfde05a --- /dev/null +++ b/src/dwarfs/inode_ordering.cpp @@ -0,0 +1,72 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include + +#include "dwarfs/entry.h" +#include "dwarfs/inode_ordering.h" +#include "dwarfs/logger.h" + +namespace dwarfs { + +template +class inode_ordering_ final : public inode_ordering::impl { + public: + inode_ordering_(logger& lgr) + : LOG_PROXY_INIT(lgr) {} + + void by_inode_number(sortable_inode_span& sp) const override; + void by_path(sortable_inode_span& sp) const override; + + private: + LOG_PROXY_DECL(LoggerPolicy); +}; + +template +void inode_ordering_::by_inode_number( + sortable_inode_span& sp) const { + std::sort( + sp.index().begin(), sp.index().end(), + [r = sp.raw()](auto a, auto b) { return r[a]->num() < r[b]->num(); }); +} + +template +void inode_ordering_::by_path(sortable_inode_span& sp) const { + std::vector paths; + + auto raw = sp.raw(); + auto& index = sp.index(); + + paths.resize(raw.size()); + + for (auto i : index) { + paths[i] = raw[i]->any()->path_as_string(); + } + + std::sort(index.begin(), index.end(), + [&](auto a, auto b) { return paths[a] < paths[b]; }); +} + +inode_ordering::inode_ordering(logger& lgr) + : impl_(make_unique_logging_object( + lgr)) {} + +} // namespace dwarfs