Add --file-hash option (fixes github #92)

This does not yet address the issue that uniquely sized files are
unnecessarily hashed, which is also mentioned in #92. This will be
addressed separately.
This commit is contained in:
Marcus Holland-Moritz 2022-10-27 09:47:52 +02:00
parent 482a40560e
commit 148de5bf0d
11 changed files with 246 additions and 165 deletions

View File

@ -276,6 +276,10 @@ Most other options are concerned with compression tuning:
Don't add a creation timestamp. This is useful when bit-identical file
system images are required to be produced from the same input.
- `--file-hash=none`|*name*:
Select the hashing function to be used for file deduplication. If `none`
is chosen, file deduplication is disabled.
- `--log-level=`*name*:
Specifiy a logging level.

View File

@ -38,27 +38,14 @@ class checksum {
XXH3_128,
};
static constexpr size_t digest_size(algorithm alg) {
switch (alg) {
case algorithm::SHA1:
return 20;
case algorithm::SHA2_512_256:
return 32;
case algorithm::XXH3_64:
return 8;
case algorithm::XXH3_128:
return 16;
}
DWARFS_CHECK(false, "unknown algorithm");
}
static bool is_available(std::string const& algo);
static std::vector<std::string> available_algorithms();
static bool
compute(algorithm alg, void const* data, size_t size, void* digest);
static bool
verify(algorithm alg, void const* data, size_t size, void const* digest);
static bool verify(algorithm alg, void const* data, size_t size,
void const* digest, size_t digest_size);
checksum(algorithm alg);
checksum(std::string const& alg);
checksum& update(void const* data, size_t size) {
impl_->update(data, size);
@ -69,7 +56,7 @@ class checksum {
bool verify(void const* digest) const;
algorithm type() const { return alg_; }
size_t digest_size() const { return impl_->digest_size(); }
class impl {
public:
@ -77,11 +64,11 @@ class checksum {
virtual void update(void const* data, size_t size) = 0;
virtual bool finalize(void* digest) = 0;
virtual size_t digest_size() = 0;
};
private:
std::unique_ptr<impl> impl_;
algorithm const alg_;
};
} // namespace dwarfs

View File

@ -21,7 +21,6 @@
#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <functional>
@ -33,6 +32,8 @@
#include <sys/stat.h>
#include <folly/small_vector.h>
#include "dwarfs/entry_interface.h"
namespace dwarfs {
@ -126,7 +127,8 @@ class file : public entry {
std::shared_ptr<inode> get_inode() const;
void accept(entry_visitor& v, bool preorder) override;
void scan(os_access& os, progress& prog) override;
void scan(std::shared_ptr<mmif> const& mm, progress& prog);
void scan(std::shared_ptr<mmif> const& mm, progress& prog,
std::optional<std::string> const& hash_alg);
void create_data();
void hardlink(file* other, progress& prog);
uint32_t unique_file_id() const;
@ -138,8 +140,8 @@ class file : public entry {
private:
struct data {
using hash_type = std::array<char, 16>;
hash_type hash{0};
using hash_type = folly::small_vector<char, 16>;
hash_type hash;
uint32_t refcount{1};
std::optional<uint32_t> inode_num;
};

View File

@ -108,11 +108,6 @@ struct section_header_v2 {
uint16_t compression; // [54] compression
uint64_t length; // [56] length of section
static_assert(checksum::digest_size(checksum::algorithm::XXH3_64) ==
sizeof(xxh3_64));
static_assert(checksum::digest_size(checksum::algorithm::SHA2_512_256) ==
sizeof(sha2_512_256));
std::string to_string() const;
void dump(std::ostream& os) const;
};

View File

@ -87,6 +87,7 @@ struct file_order_options {
struct scanner_options {
file_order_options file_order;
std::optional<std::string> file_hash_algorithm{"xxh3-128"};
std::optional<uint16_t> uid;
std::optional<uint16_t> gid;
std::optional<uint64_t> timestamp;

View File

@ -19,8 +19,11 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <array>
#include <cstring>
#include <functional>
#include <unordered_set>
#include <openssl/evp.h>
@ -35,46 +38,29 @@ namespace dwarfs {
namespace {
bool compute_evp(const EVP_MD* algorithm, void const* data, size_t size,
void* digest, unsigned int* digest_size) {
return EVP_Digest(data, size, reinterpret_cast<unsigned char*>(digest),
digest_size, algorithm, nullptr);
}
bool compute_xxh3_64(void const* data, size_t size, void* digest) {
auto hash = XXH3_64bits(data, size);
static_assert(checksum::digest_size(checksum::algorithm::XXH3_64) ==
sizeof(hash));
::memcpy(digest, &hash, sizeof(hash));
return true;
}
bool compute_xxh3_128(void const* data, size_t size, void* digest) {
auto hash = XXH3_128bits(data, size);
static_assert(checksum::digest_size(checksum::algorithm::XXH3_128) ==
sizeof(hash));
::memcpy(digest, &hash, sizeof(hash));
return true;
}
std::unordered_set<std::string> supported_algorithms{
"xxh3-64",
"xxh3-128",
};
class checksum_evp : public checksum::impl {
public:
checksum_evp(EVP_MD const* evp, checksum::algorithm alg)
: context_(EVP_MD_CTX_new())
, dig_size_(checksum::digest_size(alg)) {
EVP_DigestInit_ex(context_, evp, nullptr);
checksum_evp(::EVP_MD const* evp)
: context_(::EVP_MD_CTX_new())
, dig_size_(::EVP_MD_size(evp)) {
::EVP_DigestInit_ex(context_, evp, nullptr);
}
~checksum_evp() override { EVP_MD_CTX_destroy(context_); }
~checksum_evp() override { ::EVP_MD_CTX_destroy(context_); }
void update(void const* data, size_t size) override {
DWARFS_CHECK(EVP_DigestUpdate(context_, data, size),
DWARFS_CHECK(::EVP_DigestUpdate(context_, data, size),
"EVP_DigestUpdate() failed");
}
bool finalize(void* digest) override {
unsigned int dig_size = 0;
bool rv = EVP_DigestFinal_ex(
bool rv = ::EVP_DigestFinal_ex(
context_, reinterpret_cast<unsigned char*>(digest), &dig_size);
if (rv) {
@ -86,8 +72,27 @@ class checksum_evp : public checksum::impl {
return rv;
}
static std::vector<std::string> available_algorithms() {
std::vector<std::string> available;
::EVP_MD_do_all(
[](const ::EVP_MD*, const char* from, const char* to, void* vec) {
if (!to) {
reinterpret_cast<std::vector<std::string>*>(vec)->emplace_back(
from);
}
},
&available);
return available;
}
static bool is_available(std::string const& algo) {
return ::EVP_get_digestbyname(algo.c_str()) != nullptr;
}
size_t digest_size() override { return dig_size_; }
private:
EVP_MD_CTX* context_;
::EVP_MD_CTX* context_;
size_t const dig_size_;
};
@ -113,6 +118,10 @@ class checksum_xxh3_64 : public checksum::impl {
return true;
}
size_t digest_size() override {
return sizeof(decltype(std::function{XXH3_64bits_digest})::result_type);
}
private:
XXH3_state_t* state_;
};
@ -139,57 +148,47 @@ class checksum_xxh3_128 : public checksum::impl {
return true;
}
size_t digest_size() override {
return sizeof(decltype(std::function{XXH3_128bits_digest})::result_type);
}
private:
XXH3_state_t* state_;
};
} // namespace
bool checksum::compute(algorithm alg, void const* data, size_t size,
void* digest) {
bool rv = false;
unsigned int dig_size = 0;
bool checksum::is_available(std::string const& algo) {
return supported_algorithms.count(algo) or checksum_evp::is_available(algo);
}
switch (alg) {
case algorithm::SHA1:
rv = compute_evp(EVP_sha1(), data, size, digest, &dig_size);
break;
case algorithm::SHA2_512_256:
rv = compute_evp(EVP_sha512_256(), data, size, digest, &dig_size);
break;
case algorithm::XXH3_64:
rv = compute_xxh3_64(data, size, digest);
break;
case algorithm::XXH3_128:
rv = compute_xxh3_128(data, size, digest);
break;
}
if (rv && dig_size > 0) {
DWARFS_CHECK(digest_size(alg) == dig_size,
fmt::format("digest size mismatch: {0} != {1} [{2}]",
digest_size(alg), dig_size,
static_cast<int>(alg)));
}
return rv;
std::vector<std::string> checksum::available_algorithms() {
auto available_evp = checksum_evp::available_algorithms();
std::vector<std::string> available;
available.insert(available.end(), supported_algorithms.begin(),
supported_algorithms.end());
available.insert(available.end(), available_evp.begin(), available_evp.end());
std::sort(available.begin(), available.end());
return available;
}
bool checksum::verify(algorithm alg, void const* data, size_t size,
const void* digest) {
const void* digest, size_t digest_size) {
std::array<char, EVP_MAX_MD_SIZE> tmp;
return compute(alg, data, size, tmp.data()) &&
::memcmp(digest, tmp.data(), digest_size(alg)) == 0;
checksum cs(alg);
DWARFS_CHECK(digest_size == cs.digest_size(), "digest size mismatch");
cs.update(data, size);
return cs.finalize(tmp.data()) &&
::memcmp(digest, tmp.data(), digest_size) == 0;
}
checksum::checksum(algorithm alg)
: alg_(alg) {
checksum::checksum(algorithm alg) {
switch (alg) {
case algorithm::SHA1:
impl_ = std::make_unique<checksum_evp>(EVP_sha1(), alg);
impl_ = std::make_unique<checksum_evp>(::EVP_sha1());
break;
case algorithm::SHA2_512_256:
impl_ = std::make_unique<checksum_evp>(EVP_sha512_256(), alg);
impl_ = std::make_unique<checksum_evp>(::EVP_sha512_256());
break;
case algorithm::XXH3_64:
impl_ = std::make_unique<checksum_xxh3_64>();
@ -203,10 +202,22 @@ checksum::checksum(algorithm alg)
}
}
checksum::checksum(std::string const& alg) {
if (alg == "xxh3-64") {
impl_ = std::make_unique<checksum_xxh3_64>();
} else if (alg == "xxh3-128") {
impl_ = std::make_unique<checksum_xxh3_128>();
} else if (auto md = ::EVP_get_digestbyname(alg.c_str())) {
impl_ = std::make_unique<checksum_evp>(md);
} else {
DWARFS_CHECK(false, "unknown algorithm");
}
}
bool checksum::verify(void const* digest) const {
std::array<char, EVP_MAX_MD_SIZE> tmp;
return impl_->finalize(tmp.data()) &&
::memcmp(digest, tmp.data(), digest_size(alg_)) == 0;
::memcmp(digest, tmp.data(), impl_->digest_size()) == 0;
}
} // namespace dwarfs

View File

@ -142,7 +142,7 @@ void entry::set_ctime(uint64_t ctime) { stat_.st_atime = ctime; }
std::string_view file::hash() const {
auto& h = data_->hash;
return std::string_view(&h[0], h.size());
return std::string_view(h.data(), h.size());
}
void file::set_inode(std::shared_ptr<inode> ino) {
@ -164,32 +164,35 @@ void file::scan(os_access& os, progress& prog) {
mm = os.map_file(path(), s);
}
scan(mm, prog);
scan(mm, prog, "xxh3-128");
}
void file::scan(std::shared_ptr<mmif> const& mm, progress& prog) {
constexpr auto alg = checksum::algorithm::XXH3_128;
static_assert(checksum::digest_size(alg) == sizeof(data::hash_type));
void file::scan(std::shared_ptr<mmif> const& mm, progress& prog,
std::optional<std::string> const& hash_alg) {
if (hash_alg) {
checksum cs(*hash_alg);
if (size_t s = size(); s > 0) {
constexpr size_t chunk_size = 32 << 20;
prog.original_size += s;
checksum cs(alg);
size_t offset = 0;
if (size_t s = size(); s > 0) {
constexpr size_t chunk_size = 32 << 20;
prog.original_size += s;
size_t offset = 0;
while (s >= chunk_size) {
cs.update(mm->as<void>(offset), chunk_size);
mm->release_until(offset);
offset += chunk_size;
s -= chunk_size;
while (s >= chunk_size) {
cs.update(mm->as<void>(offset), chunk_size);
mm->release_until(offset);
offset += chunk_size;
s -= chunk_size;
}
cs.update(mm->as<void>(offset), s);
}
cs.update(mm->as<void>(offset), s);
data_->hash.resize(cs.digest_size());
DWARFS_CHECK(cs.finalize(&data_->hash[0]), "checksum computation failed");
} else {
DWARFS_CHECK(checksum::compute(alg, nullptr, 0, &data_->hash[0]),
DWARFS_CHECK(cs.finalize(data_->hash.data()),
"checksum computation failed");
} else {
prog.original_size += size();
}
}

View File

@ -122,9 +122,9 @@ class fs_section_v2 : public fs_section::impl {
bool check_fast(mmif& mm) const override {
auto hdr_cs_len =
sizeof(section_header_v2) - offsetof(section_header_v2, number);
return checksum::verify(checksum::algorithm::XXH3_64,
mm.as<void>(start_ - hdr_cs_len),
hdr_.length + hdr_cs_len, &hdr_.xxh3_64);
return checksum::verify(
checksum::algorithm::XXH3_64, mm.as<void>(start_ - hdr_cs_len),
hdr_.length + hdr_cs_len, &hdr_.xxh3_64, sizeof(hdr_.xxh3_64));
}
bool verify(mmif& mm) const override {
@ -132,7 +132,8 @@ class fs_section_v2 : public fs_section::impl {
sizeof(section_header_v2) - offsetof(section_header_v2, xxh3_64);
return checksum::verify(checksum::algorithm::SHA2_512_256,
mm.as<void>(start_ - hdr_sha_len),
hdr_.length + hdr_sha_len, &hdr_.sha2_512_256);
hdr_.length + hdr_sha_len, &hdr_.sha2_512_256,
sizeof(hdr_.sha2_512_256));
}
folly::ByteRange data(mmif& mm) const override {

View File

@ -77,22 +77,23 @@ class visitor_base : public entry_visitor {
class file_scanner {
public:
file_scanner(worker_group& wg, os_access& os, inode_manager& im,
inode_options const& ino_opts, progress& prog)
inode_options const& ino_opts,
std::optional<std::string> const& hash_algo, progress& prog)
: wg_(wg)
, os_(os)
, im_(im)
, ino_opts_(ino_opts)
, hash_algo_{hash_algo}
, prog_(prog) {}
void scan(file* p) {
if (p->num_hard_links() > 1) {
auto ino = p->raw_inode_num();
auto [it, is_new] = hardlink_cache_.emplace(ino, p);
auto& vec = hardlinks_[p->raw_inode_num()];
vec.push_back(p);
if (!is_new) {
p->hardlink(it->second, prog_);
if (vec.size() > 1) {
p->hardlink(vec[0], prog_);
++prog_.files_scanned;
hardlinked_.push_back(p);
return;
}
}
@ -108,20 +109,26 @@ class file_scanner {
}
prog_.current.store(p);
p->scan(mm, prog_);
p->scan(mm, prog_, hash_algo_);
++prog_.files_scanned;
std::shared_ptr<inode> inode;
{
std::lock_guard lock(mx_);
auto& ref = hash_[p->hash()];
if (ref.empty()) {
if (hash_algo_) {
auto& ref = hash_[p->hash()];
if (ref.empty()) {
inode = im_.create_inode();
p->set_inode(inode);
} else {
p->set_inode(ref.front()->get_inode());
}
ref.push_back(p);
} else {
files_[p->raw_inode_num()].push_back(p);
inode = im_.create_inode();
p->set_inode(inode);
} else {
p->set_inode(ref.front()->get_inode());
}
ref.push_back(p);
}
if (inode) {
@ -139,26 +146,50 @@ class file_scanner {
}
void finalize(uint32_t& inode_num) {
hardlink_cache_.clear();
if (hash_algo_) {
finalize_hardlinks(hash_, [](file const* p) { return p->hash(); });
finalize_files(hash_, inode_num);
} else {
finalize_hardlinks(files_,
[](file const* p) { return p->raw_inode_num(); });
finalize_files(files_, inode_num);
}
}
for (auto p : hardlinked_) {
auto& fv = hash_[p->hash()];
p->set_inode(fv.front()->get_inode());
fv.push_back(p);
uint32_t num_unique() const { return num_unique_; }
private:
template <typename KeyType, typename Lookup>
void finalize_hardlinks(folly::F14FastMap<KeyType, inode::files_vector>& fmap,
Lookup&& lookup) {
for (auto& kv : hardlinks_) {
auto& hlv = kv.second;
if (hlv.size() > 1) {
auto& fv = fmap[lookup(hlv.front())];
// TODO: for (auto p : hlv | std::views::drop(1)) {
std::for_each(hlv.begin() + 1, hlv.end(), [&fv](auto p) {
p->set_inode(fv.front()->get_inode());
fv.push_back(p);
});
}
}
hardlinked_.clear();
hardlinks_.clear();
}
std::vector<std::pair<std::string_view, inode::files_vector>> ent;
ent.reserve(hash_.size());
hash_.eraseInto(hash_.begin(), hash_.end(),
[&ent](std::string_view&& h, inode::files_vector&& fv) {
ent.emplace_back(std::move(h), std::move(fv));
});
template <typename KeyType>
void finalize_files(folly::F14FastMap<KeyType, inode::files_vector>& fmap,
uint32_t& inode_num) {
std::vector<std::pair<KeyType, inode::files_vector>> ent;
ent.reserve(fmap.size());
fmap.eraseInto(fmap.begin(), fmap.end(),
[&ent](KeyType&& k, inode::files_vector&& fv) {
ent.emplace_back(std::move(k), std::move(fv));
});
std::sort(ent.begin(), ent.end(),
[](auto& left, auto& right) { return left.first < right.first; });
DWARFS_CHECK(hash_.empty(), "expected hash to be empty");
DWARFS_CHECK(fmap.empty(), "expected file map to be empty");
uint32_t obj_num = 0;
@ -166,13 +197,10 @@ class file_scanner {
finalize_inodes<false>(ent, inode_num, obj_num);
}
uint32_t num_unique() const { return num_unique_; }
private:
template <bool Unique>
void finalize_inodes(
std::vector<std::pair<std::string_view, inode::files_vector>>& ent,
uint32_t& inode_num, uint32_t& obj_num) {
template <bool Unique, typename KeyType>
void
finalize_inodes(std::vector<std::pair<KeyType, inode::files_vector>>& ent,
uint32_t& inode_num, uint32_t& obj_num) {
for (auto& p : ent) {
auto& files = p.second;
@ -217,12 +245,13 @@ class file_scanner {
os_access& os_;
inode_manager& im_;
inode_options const& ino_opts_;
std::optional<std::string> const hash_algo_;
progress& prog_;
uint32_t num_unique_{0};
std::vector<file*> hardlinked_;
folly::F14FastMap<uint64_t, file*> hardlink_cache_;
folly::F14FastMap<uint64_t, inode::files_vector> hardlinks_;
std::mutex mx_;
folly::F14FastMap<std::string_view, inode::files_vector> hash_;
folly::F14FastMap<uint64_t, inode::files_vector> files_;
};
class dir_set_inode_visitor : public visitor_base {
@ -600,7 +629,8 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
prog.set_status_function(status_string);
inode_manager im(lgr_, prog);
file_scanner fs(wg_, *os_, im, options_.inode, prog);
file_scanner fs(wg_, *os_, im, options_.inode, options_.file_hash_algorithm,
prog);
auto root = scan_tree(path, prog, fs);

View File

@ -100,13 +100,15 @@ const std::map<std::string, file_order_mode> order_choices{
{"script", file_order_mode::SCRIPT},
#endif
{"similarity", file_order_mode::SIMILARITY},
{"nilsimsa", file_order_mode::NILSIMSA}};
{"nilsimsa", file_order_mode::NILSIMSA},
};
const std::map<std::string, console_writer::progress_mode> progress_modes{
{"none", console_writer::NONE},
{"simple", console_writer::SIMPLE},
{"ascii", console_writer::ASCII},
{"unicode", console_writer::UNICODE}};
{"unicode", console_writer::UNICODE},
};
const std::map<std::string, uint32_t> time_resolutions{
{"sec", 1},
@ -336,7 +338,8 @@ int mkdwarfs(int argc, char** argv) {
block_manager::config cfg;
std::string path, output, memory_limit, script_arg, compression, header,
schema_compression, metadata_compression, log_level_str, timestamp,
time_resolution, order, progress_mode, recompress_opts, pack_metadata;
time_resolution, order, progress_mode, recompress_opts, pack_metadata,
file_hash_algo;
size_t num_workers;
bool no_progress = false, remove_header = false, no_section_index = false,
force_overwrite = false;
@ -355,6 +358,11 @@ int mkdwarfs(int argc, char** argv) {
(from(time_resolutions) | get<0>() | unsplit(", ")) +
")";
auto hash_list = checksum::available_algorithms();
auto file_hash_desc = "choice of file hashing function (none, " +
(from(hash_list) | unsplit(", ")) + ")";
// clang-format off
po::options_description opts("Command line options");
opts.add_options()
@ -453,6 +461,9 @@ int mkdwarfs(int argc, char** argv) {
("no-create-timestamp",
po::value<bool>(&options.no_create_timestamp)->zero_tokens(),
"don't add create timestamp to file system")
("file-hash",
po::value<std::string>(&file_hash_algo)->default_value("xxh3-128"),
file_hash_desc.c_str())
("log-level",
po::value<std::string>(&log_level_str)->default_value("info"),
"log level (error, warn, info, debug, trace)")
@ -657,6 +668,16 @@ int mkdwarfs(int argc, char** argv) {
return 1;
}
if (file_hash_algo == "none") {
options.file_hash_algorithm.reset();
} else if (checksum::is_available(file_hash_algo)) {
options.file_hash_algorithm = file_hash_algo;
} else {
std::cerr << "error: unknown file hash function '" << file_hash_algo
<< "'\n";
return 1;
}
size_t mem_limit = parse_size_with_unit(memory_limit);
worker_group wg_compress("compress", num_workers);

View File

@ -50,6 +50,8 @@ using namespace dwarfs;
namespace {
std::string const default_file_hash_algo{"xxh3-128"};
std::string
build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
std::string const& compression,
@ -80,7 +82,8 @@ void basic_end_to_end_test(std::string const& compressor,
bool pack_directories, bool pack_shared_files_table,
bool pack_names, bool pack_names_index,
bool pack_symlinks, bool pack_symlinks_index,
bool plain_names_table, bool plain_symlinks_table) {
bool plain_names_table, bool plain_symlinks_table,
std::optional<std::string> file_hash_algo) {
block_manager::config cfg;
scanner_options options;
@ -88,6 +91,7 @@ void basic_end_to_end_test(std::string const& compressor,
cfg.block_size_bits = block_size_bits;
options.file_order.mode = file_order;
options.file_hash_algorithm = file_hash_algo;
options.with_devices = with_devices;
options.with_specials = with_specials;
options.inode.with_similarity = file_order == file_order_mode::SIMILARITY;
@ -397,8 +401,10 @@ class compression_test
std::tuple<std::string, unsigned, file_order_mode>> {};
class scanner_test : public testing::TestWithParam<
std::tuple<bool, bool, bool, bool, bool, bool, bool>> {
};
std::tuple<bool, bool, bool, bool, bool, bool, bool,
std::optional<std::string>>> {};
class hashing_test : public testing::TestWithParam<std::string> {};
class packing_test : public testing::TestWithParam<
std::tuple<bool, bool, bool, bool, bool, bool, bool>> {
@ -417,17 +423,24 @@ TEST_P(compression_test, end_to_end) {
basic_end_to_end_test(compressor, block_size_bits, file_order, true, true,
false, false, false, false, false, true, true, true,
true, true, true, true, false, false);
true, true, true, true, false, false,
default_file_hash_algo);
}
TEST_P(scanner_test, end_to_end) {
auto [with_devices, with_specials, set_uid, set_gid, set_time, keep_all_times,
enable_nlink] = GetParam();
enable_nlink, file_hash_algo] = GetParam();
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE,
with_devices, with_specials, set_uid, set_gid, set_time,
keep_all_times, enable_nlink, true, true, true, true,
true, true, true, false, false);
true, true, true, false, false, file_hash_algo);
}
TEST_P(hashing_test, end_to_end) {
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true,
true, true, true, true, true, true, true, true, true,
true, true, true, false, false, GetParam());
}
TEST_P(packing_test, end_to_end) {
@ -438,7 +451,7 @@ TEST_P(packing_test, end_to_end) {
false, false, false, false, false, pack_chunk_table,
pack_directories, pack_shared_files_table, pack_names,
pack_names_index, pack_symlinks, pack_symlinks_index,
false, false);
false, false, default_file_hash_algo);
}
TEST_P(plain_tables_test, end_to_end) {
@ -447,7 +460,7 @@ TEST_P(plain_tables_test, end_to_end) {
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true,
false, false, false, false, false, false, false, false,
false, false, false, false, plain_names_table,
plain_symlinks_table);
plain_symlinks_table, default_file_hash_algo);
}
TEST_P(packing_test, regression_empty_fs) {
@ -516,7 +529,11 @@ INSTANTIATE_TEST_SUITE_P(
dwarfs, scanner_test,
::testing::Combine(::testing::Bool(), ::testing::Bool(), ::testing::Bool(),
::testing::Bool(), ::testing::Bool(), ::testing::Bool(),
::testing::Bool()));
::testing::Bool(),
::testing::Values(std::nullopt, "xxh3-128", "sha512")));
INSTANTIATE_TEST_SUITE_P(dwarfs, hashing_test,
::testing::ValuesIn(checksum::available_algorithms()));
INSTANTIATE_TEST_SUITE_P(
dwarfs, packing_test,
@ -661,7 +678,12 @@ TEST_P(compression_regression, github45) {
INSTANTIATE_TEST_SUITE_P(dwarfs, compression_regression,
::testing::ValuesIn(compressions));
TEST(scanner, inode_ordering) {
class file_scanner : public testing::TestWithParam<std::optional<std::string>> {
};
TEST_P(file_scanner, inode_ordering) {
auto file_hash_algo = GetParam();
std::ostringstream logss;
stream_logger lgr(logss); // TODO: mock
lgr.set_policy<prod_logger_policy>();
@ -670,9 +692,10 @@ TEST(scanner, inode_ordering) {
auto opts = scanner_options();
opts.file_order.mode = file_order_mode::PATH;
opts.file_hash_algorithm = file_hash_algo;
auto input = std::make_shared<test::os_access_mock>();
constexpr int dim = 15;
constexpr int dim = 14;
input->add_dir("");
@ -693,3 +716,6 @@ TEST(scanner, inode_ordering) {
EXPECT_EQ(ref, build_dwarfs(lgr, input, "null", bmcfg, opts));
}
}
INSTANTIATE_TEST_SUITE_P(dwarfs, file_scanner,
::testing::Values(std::nullopt, "xxh3-128"));