From 422146d7a28389fbf88a0dbf06030a246977b13d Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Wed, 3 Aug 2022 18:54:57 +0200 Subject: [PATCH] Produce deterministic inode numbers (needed to fix #91) While most of the code typically ensures that elements are kept in a deterministic order, the code that assigned inode numbers was iterating a hash table, which by itself guaranteed FIFO semantics, but items were inserted from multiple threads when scanning the input file system. This change adds a sorting step before assigning inode numbers. (This shouldn't be much of a performance hit.) --- src/dwarfs/scanner.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index 9e14528b..49df0065 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -150,20 +150,31 @@ class file_scanner { hardlinked_.clear(); + std::vector> ent; + ent.reserve(hash_.size()); + hash_.eraseInto(hash_.begin(), hash_.end(), + [&ent](std::string_view&& h, inode::files_vector&& fv) { + ent.emplace_back(std::move(h), std::move(fv)); + }); + std::sort(ent.begin(), ent.end(), + [](auto& left, auto& right) { return left.first < right.first; }); + + DWARFS_CHECK(hash_.empty(), "expected hash to be empty"); + uint32_t obj_num = 0; - finalize_inodes(inode_num, obj_num); - finalize_inodes(inode_num, obj_num); - - hash_.clear(); + finalize_inodes(ent, inode_num, obj_num); + finalize_inodes(ent, inode_num, obj_num); } uint32_t num_unique() const { return num_unique_; } private: template - void finalize_inodes(uint32_t& inode_num, uint32_t& obj_num) { - for (auto& p : hash_) { + void finalize_inodes( + std::vector>& ent, + uint32_t& inode_num, uint32_t& obj_num) { + for (auto& p : ent) { auto& files = p.second; if constexpr (Unique) {