Produce deterministic inode numbers (needed to fix #91)

While most of the code typically ensures that elements are kept
in a deterministic order, the code that assigned inode numbers
was iterating a hash table, which by itself guaranteed FIFO
semantics, but items were inserted from multiple threads when
scanning the input file system.

This change adds a sorting step before assigning inode numbers.
(This shouldn't be much of a performance hit.)
This commit is contained in:
Marcus Holland-Moritz 2022-08-03 18:54:57 +02:00
parent 6bbd4e3970
commit 422146d7a2

View File

@ -150,20 +150,31 @@ class file_scanner {
hardlinked_.clear(); hardlinked_.clear();
std::vector<std::pair<std::string_view, inode::files_vector>> ent;
ent.reserve(hash_.size());
hash_.eraseInto(hash_.begin(), hash_.end(),
[&ent](std::string_view&& h, inode::files_vector&& fv) {
ent.emplace_back(std::move(h), std::move(fv));
});
std::sort(ent.begin(), ent.end(),
[](auto& left, auto& right) { return left.first < right.first; });
DWARFS_CHECK(hash_.empty(), "expected hash to be empty");
uint32_t obj_num = 0; uint32_t obj_num = 0;
finalize_inodes<true>(inode_num, obj_num); finalize_inodes<true>(ent, inode_num, obj_num);
finalize_inodes<false>(inode_num, obj_num); finalize_inodes<false>(ent, inode_num, obj_num);
hash_.clear();
} }
uint32_t num_unique() const { return num_unique_; } uint32_t num_unique() const { return num_unique_; }
private: private:
template <bool Unique> template <bool Unique>
void finalize_inodes(uint32_t& inode_num, uint32_t& obj_num) { void finalize_inodes(
for (auto& p : hash_) { std::vector<std::pair<std::string_view, inode::files_vector>>& ent,
uint32_t& inode_num, uint32_t& obj_num) {
for (auto& p : ent) {
auto& files = p.second; auto& files = p.second;
if constexpr (Unique) { if constexpr (Unique) {