Add support for nilsimsa-based inode ordering

Also refactor inode manager to take full control of ordering.
Nilsimsa ordering is really slow, but has the advantage that
inodes are added continuously while ordering. This allows us
to perform the bulk of the ordering in the background while
already running segmenting/compression.
This commit is contained in:
Marcus Holland-Moritz 2020-12-07 22:16:31 +01:00
parent 4f22043279
commit 63c9e9a3c3
10 changed files with 393 additions and 158 deletions

View File

@ -109,9 +109,8 @@ class entry : public entry_interface {
class file : public entry {
public:
file(const std::string& name, std::shared_ptr<entry> parent,
const struct ::stat& st, bool with_similarity)
: entry(name, parent, st)
, with_similarity_(with_similarity) {}
const struct ::stat& st)
: entry(name, parent, st) {}
type_t type() const override;
std::string_view hash() const;
@ -119,14 +118,11 @@ class file : public entry {
std::shared_ptr<inode> get_inode() const;
void accept(entry_visitor& v, bool preorder) override;
uint32_t inode_num() const override;
uint32_t similarity_hash() const { return similarity_hash_; }
void scan(os_access& os, progress& prog) override;
private:
using hash_type = std::array<char, 20>;
uint32_t similarity_hash_{0};
const bool with_similarity_;
hash_type hash_{0};
std::shared_ptr<inode> inode_;
};
@ -195,7 +191,7 @@ class device : public entry {
class entry_factory {
public:
static std::unique_ptr<entry_factory> create(bool with_similarity = false);
static std::unique_ptr<entry_factory> create();
virtual ~entry_factory() = default;

View File

@ -35,15 +35,20 @@ struct chunk;
}
class file;
class os_access;
struct inode_options;
class inode : public object {
public:
using files_vector = folly::small_vector<file*, 1>;
virtual void set_files(files_vector&& fv) = 0;
virtual void scan(os_access& os, inode_options const& options) = 0;
virtual void set_num(uint32_t num) = 0;
virtual uint32_t num() const = 0;
virtual uint32_t similarity_hash() const = 0;
virtual std::vector<uint64_t> const& nilsimsa_similarity_hash() const = 0;
virtual size_t size() const = 0;
virtual file const* any() const = 0;
virtual files_vector const& files() const = 0;

View File

@ -25,23 +25,45 @@
#include <functional>
#include <memory>
#include "dwarfs/options.h"
namespace dwarfs {
class inode;
class logger;
class script;
class inode_manager {
public:
static std::unique_ptr<inode_manager> create();
using inode_cb = std::function<void(std::shared_ptr<inode> const&)>;
virtual ~inode_manager() = default;
virtual std::shared_ptr<inode> create_inode() = 0;
virtual size_t count() const = 0;
virtual void order_inodes() = 0;
virtual void order_inodes(std::shared_ptr<script> scr) = 0;
virtual void order_inodes_by_similarity() = 0;
virtual void number_inodes(size_t first_no) = 0;
virtual void for_each_inode(
std::function<void(std::shared_ptr<inode> const&)> const& fn) const = 0;
inode_manager(logger& lgr);
std::shared_ptr<inode> create_inode() { return impl_->create_inode(); }
size_t count() const { return impl_->count(); }
void order_inodes(std::shared_ptr<script> scr, file_order_mode file_order,
uint32_t first_inode, inode_cb const& fn) {
impl_->order_inodes(std::move(scr), file_order, first_inode, fn);
}
void for_each_inode(inode_cb const& fn) const { impl_->for_each_inode(fn); }
class impl {
public:
virtual ~impl() = default;
virtual std::shared_ptr<inode> create_inode() = 0;
virtual size_t count() const = 0;
virtual void
order_inodes(std::shared_ptr<script> scr, file_order_mode file_order,
uint32_t first_inode, inode_cb const& fn) = 0;
virtual void for_each_inode(
std::function<void(std::shared_ptr<inode> const&)> const& fn) const = 0;
};
private:
std::unique_ptr<impl> impl_;
};
} // namespace dwarfs

View File

@ -46,7 +46,14 @@ struct filesystem_options {
metadata_options metadata;
};
enum class file_order_mode { NONE, PATH, SCRIPT, SIMILARITY };
struct inode_options {
bool with_similarity{false};
bool with_nilsimsa{false};
bool needs_scan() const { return with_similarity || with_nilsimsa; }
};
enum class file_order_mode { NONE, PATH, SCRIPT, SIMILARITY, NILSIMSA };
struct scanner_options {
file_order_mode file_order{file_order_mode::NONE};
@ -54,6 +61,7 @@ struct scanner_options {
std::optional<uint16_t> gid;
std::optional<uint64_t> timestamp;
bool remove_empty_dirs{false};
inode_options inode;
};
std::ostream& operator<<(std::ostream& os, file_order_mode mode);

View File

@ -32,9 +32,10 @@
#include "dwarfs/global_entry_data.h"
#include "dwarfs/inode.h"
#include "dwarfs/mmif.h"
#include "dwarfs/nilsimsa.h"
#include "dwarfs/options.h"
#include "dwarfs/os_access.h"
#include "dwarfs/progress.h"
#include "dwarfs/similarity.h"
#include "dwarfs/gen-cpp2/metadata_types.h"
@ -168,10 +169,6 @@ void file::scan(os_access& os, progress& prog) {
auto mm = os.map_file(path(), s);
::SHA1(mm->as<unsigned char>(), s,
reinterpret_cast<unsigned char*>(&hash_[0]));
if (with_similarity_) {
similarity_hash_ = get_similarity_hash(mm->as<uint8_t>(), s);
}
} else {
::SHA1(nullptr, 0, reinterpret_cast<unsigned char*>(&hash_[0]));
}
@ -290,9 +287,6 @@ uint64_t device::device_id() const { return status().st_rdev; }
class entry_factory_ : public entry_factory {
public:
entry_factory_(bool with_similarity)
: with_similarity_(with_similarity) {}
std::shared_ptr<entry> create(os_access& os, const std::string& name,
std::shared_ptr<entry> parent) override {
const std::string& p = parent ? parent->path() + "/" + name : name;
@ -302,8 +296,7 @@ class entry_factory_ : public entry_factory {
auto mode = st.st_mode;
if (S_ISREG(mode)) {
return std::make_shared<file>(name, std::move(parent), st,
with_similarity_);
return std::make_shared<file>(name, std::move(parent), st);
} else if (S_ISDIR(mode)) {
return std::make_shared<dir>(name, std::move(parent), st);
} else if (S_ISLNK(mode)) {
@ -317,12 +310,9 @@ class entry_factory_ : public entry_factory {
return std::shared_ptr<entry>();
}
private:
const bool with_similarity_;
};
std::unique_ptr<entry_factory> entry_factory::create(bool with_similarity) {
return std::make_unique<entry_factory_>(with_similarity);
std::unique_ptr<entry_factory> entry_factory::create() {
return std::make_unique<entry_factory_>();
}
} // namespace dwarfs

View File

@ -20,7 +20,9 @@
*/
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <deque>
#include <limits>
#include <numeric>
#include <stdexcept>
@ -30,64 +32,125 @@
#include "dwarfs/entry.h"
#include "dwarfs/inode.h"
#include "dwarfs/inode_manager.h"
#include "dwarfs/logger.h"
#include "dwarfs/mmif.h"
#include "dwarfs/nilsimsa.h"
#include "dwarfs/os_access.h"
#include "dwarfs/script.h"
#include "dwarfs/similarity.h"
#include "dwarfs/gen-cpp2/metadata_types.h"
namespace dwarfs {
class inode_manager_ : public inode_manager {
private:
class inode_ : public inode {
public:
using chunk_type = thrift::metadata::chunk;
void set_num(uint32_t num) override { num_ = num; }
uint32_t num() const override { return num_; }
uint32_t similarity_hash() const override {
if (files_.empty()) {
throw std::runtime_error("inode has no file");
}
return files_.front()->similarity_hash();
}
void set_files(files_vector&& fv) override {
if (!files_.empty()) {
throw std::runtime_error("files already set for inode");
}
files_ = std::move(fv);
}
void add_chunk(size_t block, size_t offset, size_t size) override {
chunk_type c;
c.block = block;
c.offset = offset;
c.size = size;
chunks_.push_back(c);
}
size_t size() const override { return any()->size(); }
files_vector const& files() const override { return files_; }
file const* any() const override {
if (files_.empty()) {
throw std::runtime_error("inode has no file");
}
return files_.front();
}
void append_chunks_to(std::vector<chunk_type>& vec) const override {
vec.insert(vec.end(), chunks_.begin(), chunks_.end());
}
private:
uint32_t num_{std::numeric_limits<uint32_t>::max()};
files_vector files_;
std::vector<chunk_type> chunks_;
};
namespace {
class inode_ : public inode {
public:
using chunk_type = thrift::metadata::chunk;
void set_num(uint32_t num) override { num_ = num; }
uint32_t num() const override { return num_; }
uint32_t similarity_hash() const override {
if (files_.empty()) {
throw std::runtime_error("inode has no file");
}
return similarity_hash_;
}
std::vector<uint64_t> const& nilsimsa_similarity_hash() const override {
if (files_.empty()) {
throw std::runtime_error("inode has no file");
}
return nilsimsa_similarity_hash_;
}
void set_files(files_vector&& fv) override {
if (!files_.empty()) {
throw std::runtime_error("files already set for inode");
}
files_ = std::move(fv);
}
void scan(os_access& os, inode_options const& opts) override {
if (opts.needs_scan()) {
auto file = files_.front();
auto size = file->size();
if (size > 0) {
auto mm = os.map_file(file->path(), size);
auto data = mm->as<uint8_t>();
if (opts.with_similarity) {
similarity_hash_ = get_similarity_hash(data, size);
}
if (opts.with_nilsimsa) {
nilsimsa_similarity_hash_ = nilsimsa_compute_hash(data, size);
}
}
}
}
void add_chunk(size_t block, size_t offset, size_t size) override {
chunk_type c;
c.block = block;
c.offset = offset;
c.size = size;
chunks_.push_back(c);
}
size_t size() const override { return any()->size(); }
files_vector const& files() const override { return files_; }
file const* any() const override {
if (files_.empty()) {
throw std::runtime_error("inode has no file");
}
return files_.front();
}
void append_chunks_to(std::vector<chunk_type>& vec) const override {
vec.insert(vec.end(), chunks_.begin(), chunks_.end());
}
private:
uint32_t num_{std::numeric_limits<uint32_t>::max()};
uint32_t similarity_hash_{0};
files_vector files_;
std::vector<chunk_type> chunks_;
std::vector<uint64_t> nilsimsa_similarity_hash_;
};
class nilsimsa_cache_entry {
public:
nilsimsa_cache_entry(std::shared_ptr<inode> i)
: size(i->size())
, hash(i->nilsimsa_similarity_hash().data())
, path(i->any()->path())
, ino(std::move(i)) {
assert(hash);
}
int similarity{0};
uint64_t const size;
uint64_t const* const hash;
std::string const path;
std::shared_ptr<inode> ino;
};
} // namespace
template <typename LoggerPolicy>
class inode_manager_ : public inode_manager::impl {
public:
inode_manager_(logger& lgr)
: log_(lgr) {}
std::shared_ptr<inode> create_inode() override {
auto ino = std::make_shared<inode_>();
inodes_.push_back(ino);
@ -96,11 +159,59 @@ class inode_manager_ : public inode_manager {
size_t count() const override { return inodes_.size(); }
void order_inodes(std::shared_ptr<script> scr) override {
scr->order(inodes_);
void order_inodes(std::shared_ptr<script> scr, file_order_mode file_order,
uint32_t first_inode,
inode_manager::inode_cb const& fn) override {
switch (file_order) {
case file_order_mode::NONE:
log_.info() << "keeping inode order";
break;
case file_order_mode::PATH: {
log_.info() << "ordering " << count() << " inodes by path name...";
auto ti = log_.timed_info();
order_inodes_by_path();
ti << count() << " inodes ordered";
break;
}
case file_order_mode::SCRIPT: {
if (!scr->has_order()) {
throw std::runtime_error("script cannot order inodes");
}
log_.info() << "ordering " << count() << " inodes using script...";
auto ti = log_.timed_info();
scr->order(inodes_);
ti << count() << " inodes ordered";
break;
}
case file_order_mode::SIMILARITY: {
log_.info() << "ordering " << count() << " inodes by similarity...";
auto ti = log_.timed_info();
order_inodes_by_similarity();
ti << count() << " inodes ordered";
break;
}
case file_order_mode::NILSIMSA: {
log_.info() << "ordering " << count()
<< " inodes using nilsimsa similarity...";
auto ti = log_.timed_info();
order_inodes_by_nilsimsa(fn, first_inode);
ti << count() << " inodes ordered";
break;
}
}
if (file_order != file_order_mode::NILSIMSA) {
log_.info() << "assigning file inodes...";
number_inodes(first_inode);
for_each_inode(fn);
}
}
void order_inodes() override {
void order_inodes_by_path() {
std::vector<std::string> paths;
std::vector<size_t> index(inodes_.size());
@ -125,7 +236,7 @@ class inode_manager_ : public inode_manager {
inodes_.swap(tmp);
}
void order_inodes_by_similarity() override {
void order_inodes_by_similarity() {
std::sort(
inodes_.begin(), inodes_.end(),
[](const std::shared_ptr<inode>& a, const std::shared_ptr<inode>& b) {
@ -138,7 +249,131 @@ class inode_manager_ : public inode_manager {
});
}
void number_inodes(size_t first_no) override {
void order_inodes_by_nilsimsa(inode_manager::inode_cb const& fn,
uint32_t inode_no) {
auto finalize_inode = [&](auto& ino) {
ino->set_num(inode_no++);
fn(ino);
};
auto count = inodes_.size();
// skip all empty inodes (this is at most one)
auto beg = std::partition(inodes_.begin(), inodes_.end(),
[](auto const& p) { return p->size() == 0; });
for (auto it = inodes_.begin(); it != beg; ++it) {
finalize_inode(*it);
}
// find the largest inode
std::nth_element(beg, beg, inodes_.end(), [](auto const& a, auto const& b) {
return (a->size() > b->size() ||
(a->size() == b->size() && a->any()->path() < b->any()->path()));
});
finalize_inode(*beg);
// build a cache for the remaining inodes
std::vector<nilsimsa_cache_entry> cache;
std::deque<uint32_t> index;
index.resize(std::distance(beg + 1, inodes_.end()));
std::iota(index.begin(), index.end(), 0);
cache.reserve(index.size());
for (auto it = beg + 1; it != inodes_.end(); ++it) {
cache.emplace_back(std::move(*it));
}
assert(index.size() == cache.size());
// and temporarily remove from the original array
inodes_.erase(beg + 1, inodes_.end());
while (!index.empty()) {
// compare reference inode with all remaining inodes
auto* ref_hash = inodes_.back()->nilsimsa_similarity_hash().data();
for (auto& d : cache) {
d.similarity = dwarfs::nilsimsa_similarity(ref_hash, d.hash);
}
auto cmp = [&cache](uint32_t a, uint32_t b) {
auto& da = cache[a];
auto& db = cache[b];
return da.similarity > db.similarity ||
(da.similarity == db.similarity &&
(da.size > db.size ||
(da.size == db.size && da.path < db.path)));
};
size_t depth = 0;
size_t depth_thresh;
const int sim_thresh_depth = 16;
const int sim_thresh = 0;
const size_t max_depth = 2000;
const size_t depth_step = 500;
if (index.size() > max_depth) {
while (depth < max_depth && depth + depth_step < index.size()) {
std::partial_sort(index.begin() + depth,
index.begin() + depth + depth_step, index.end(),
cmp);
depth += depth_step;
if (cache[index[0]].similarity - cache[index[depth - 1]].similarity >
sim_thresh_depth) {
do {
--depth;
} while (cache[index[0]].similarity -
cache[index[depth - 1]].similarity >
sim_thresh_depth);
break;
}
}
depth_thresh = depth / 2;
} else {
std::sort(index.begin(), index.end(), cmp);
depth = index.size();
depth_thresh = 0;
}
auto sim = cache[index.front()].similarity;
while (!index.empty() && depth > depth_thresh &&
sim - cache[index.front()].similarity <= sim_thresh) {
inodes_.push_back(std::move(cache[index.front()].ino));
finalize_inode(inodes_.back());
index.pop_front();
--depth;
}
while (depth > depth_thresh) {
ref_hash = inodes_.back()->nilsimsa_similarity_hash().data();
for (size_t i = 0; i < depth; ++i) {
cache[index[i]].similarity =
dwarfs::nilsimsa_similarity(ref_hash, cache[index[i]].hash);
}
std::partial_sort(index.begin(), index.begin() + (depth - depth_thresh),
index.begin() + depth, cmp);
sim = cache[index.front()].similarity;
while (!index.empty() && depth > depth_thresh &&
sim - cache[index.front()].similarity <= sim_thresh) {
inodes_.push_back(std::move(cache[index.front()].ino));
finalize_inode(inodes_.back());
index.pop_front();
--depth;
}
}
}
if (count != inodes_.size()) {
throw std::runtime_error("internal error: nilsimsa ordering failed");
}
}
void number_inodes(size_t first_no) {
for (auto& i : inodes_) {
i->set_num(first_no++);
}
@ -154,9 +389,11 @@ class inode_manager_ : public inode_manager {
private:
std::vector<std::shared_ptr<inode>> inodes_;
log_proxy<LoggerPolicy> log_;
};
std::unique_ptr<inode_manager> inode_manager::create() {
return std::make_unique<inode_manager_>();
}
inode_manager::inode_manager(logger& lgr)
: impl_(make_unique_logging_object<impl, inode_manager_, logger_policies>(
lgr)) {}
} // namespace dwarfs

View File

@ -44,6 +44,9 @@ std::ostream& operator<<(std::ostream& os, file_order_mode mode) {
case file_order_mode::SIMILARITY:
modestr = "similarity";
break;
case file_order_mode::NILSIMSA:
modestr = "nilsimsa";
break;
default:
break;
}

View File

@ -37,6 +37,8 @@
#include <folly/ExceptionString.h>
#include <fmt/format.h>
#include "dwarfs/entry.h"
#include "dwarfs/filesystem_writer.h"
#include "dwarfs/global_entry_data.h"
@ -93,7 +95,8 @@ class file_deduplication_visitor : public visitor_base {
public:
void visit(file* p) override { hash_[p->hash()].push_back(p); }
void deduplicate_files(inode_manager& im, progress& prog) {
void deduplicate_files(worker_group& wg, os_access& os, inode_manager& im,
inode_options const& ino_opts, progress& prog) {
for (auto& p : hash_) {
auto& files = p.second;
@ -115,6 +118,10 @@ class file_deduplication_visitor : public visitor_base {
}
inode->set_files(std::move(files));
if (ino_opts.needs_scan()) {
wg.add_job([&, inode] { inode->scan(os, ino_opts); });
}
}
}
@ -278,7 +285,6 @@ class scanner_ : public scanner::impl {
private:
std::shared_ptr<entry> scan_tree(const std::string& path, progress& prog);
void order_files(inode_manager& im);
const block_manager::config& cfg_;
const scanner_options& options_;
@ -409,42 +415,6 @@ scanner_<LoggerPolicy>::scan_tree(const std::string& path, progress& prog) {
return root;
}
template <typename LoggerPolicy>
void scanner_<LoggerPolicy>::order_files(inode_manager& im) {
switch (options_.file_order) {
case file_order_mode::NONE:
log_.info() << "keeping inode order";
break;
case file_order_mode::PATH: {
log_.info() << "ordering " << im.count() << " inodes by path name...";
auto ti = log_.timed_info();
im.order_inodes();
ti << im.count() << " inodes ordered";
break;
}
case file_order_mode::SCRIPT: {
if (!script_->has_order()) {
throw std::runtime_error("script cannot order inodes");
}
log_.info() << "ordering " << im.count() << " inodes using script...";
auto ti = log_.timed_info();
im.order_inodes(script_);
ti << im.count() << " inodes ordered";
break;
}
case file_order_mode::SIMILARITY: {
log_.info() << "ordering " << im.count() << " inodes by similarity...";
auto ti = log_.timed_info();
im.order_inodes_by_similarity();
ti << im.count() << " inodes ordered";
break;
}
}
}
template <typename LoggerPolicy>
void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
const std::string& path, progress& prog) {
@ -479,22 +449,22 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.info() << "finding duplicate files...";
auto im = inode_manager::create();
inode_manager im(lgr_);
file_deduplication_visitor fdv;
root->accept(fdv);
fdv.deduplicate_files(*im, prog);
fdv.deduplicate_files(wg_, *os_, im, options_.inode, prog);
log_.info() << "saved " << size_with_unit(prog.saved_by_deduplication)
<< " / " << size_with_unit(prog.original_size) << " in "
<< prog.duplicate_files << "/" << prog.files_found
<< " duplicate files";
order_files(*im);
log_.info() << "assigning file inodes...";
im->number_inodes(first_file_inode);
if (options_.inode.needs_scan()) {
log_.info() << "waiting for inode scanners...";
wg_.wait();
}
global_entry_data ge_data(options_);
thrift::metadata::metadata mv2;
@ -502,7 +472,7 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
mv2.link_index.resize(first_file_inode - first_link_inode);
log_.info() << "assigning device inodes...";
uint32_t first_device_inode = first_file_inode + im->count();
uint32_t first_device_inode = first_file_inode + im.count();
device_set_inode_visitor devsiv(first_device_inode);
root->accept(devsiv);
mv2.devices_ref() = std::move(devsiv.device_ids());
@ -534,11 +504,12 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.info() << "building blocks...";
block_manager bm(lgr_, prog, cfg_, os_, fsw);
im->for_each_inode([&](std::shared_ptr<inode> const& ino) {
prog.current.store(ino.get());
bm.add_inode(ino);
prog.inodes_written++;
});
im.order_inodes(script_, options_.file_order, first_file_inode,
[&](std::shared_ptr<inode> const& ino) {
prog.current.store(ino.get());
bm.add_inode(ino);
prog.inodes_written++;
});
log_.info() << "waiting for block compression to finish...";
@ -567,19 +538,19 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
root->set_name(std::string());
log_.info() << "saving chunks...";
mv2.chunk_index.resize(im->count() + 1);
mv2.chunk_index.resize(im.count() + 1);
// TODO: we should be able to start this once all blocks have been
// submitted for compression
im->for_each_inode([&](std::shared_ptr<inode> const& ino) {
im.for_each_inode([&](std::shared_ptr<inode> const& ino) {
mv2.chunk_index.at(ino->num() - first_file_inode) = mv2.chunks.size();
ino->append_chunks_to(mv2.chunks);
});
// insert dummy inode to help determine number of chunks per inode
mv2.chunk_index.at(im->count()) = mv2.chunks.size();
mv2.chunk_index.at(im.count()) = mv2.chunks.size();
log_.debug() << "total number of file inodes: " << im->count();
log_.debug() << "total number of file inodes: " << im.count();
log_.debug() << "total number of chunks: " << mv2.chunks.size();
log_.info() << "saving directories...";

View File

@ -95,7 +95,8 @@ const std::map<std::string, file_order_mode> order_choices{
#ifdef DWARFS_HAVE_PYTHON
{"script", file_order_mode::SCRIPT},
#endif
{"similarity", file_order_mode::SIMILARITY}};
{"similarity", file_order_mode::SIMILARITY},
{"nilsimsa", file_order_mode::NILSIMSA}};
} // namespace
@ -555,10 +556,12 @@ int mkdwarfs(int argc, char** argv) {
wg_writer.wait();
ti << "filesystem rewritten";
} else {
scanner s(lgr, wg_scanner, cfg,
entry_factory::create(force_similarity ||
options.file_order ==
file_order_mode::SIMILARITY),
options.inode.with_similarity =
force_similarity || options.file_order == file_order_mode::SIMILARITY;
options.inode.with_nilsimsa =
options.file_order == file_order_mode::NILSIMSA;
scanner s(lgr, wg_scanner, cfg, entry_factory::create(),
std::make_shared<os_access_posix>(), std::move(script), options);
{

View File

@ -199,6 +199,8 @@ void basic_end_to_end_test(const std::string& compressor,
cfg.block_size_bits = block_size_bits;
options.file_order = file_order;
options.inode.with_similarity = file_order == file_order_mode::SIMILARITY;
options.inode.with_nilsimsa = file_order == file_order_mode::NILSIMSA;
// force multithreading
worker_group wg("writer", 4);
@ -207,8 +209,7 @@ void basic_end_to_end_test(const std::string& compressor,
stream_logger lgr(logss); // TODO: mock
lgr.set_policy<prod_logger_policy>();
scanner s(lgr, wg, cfg,
entry_factory::create(file_order == file_order_mode::SIMILARITY),
scanner s(lgr, wg, cfg, entry_factory::create(),
std::make_shared<test::os_access_mock>(),
std::make_shared<test::script_mock>(), options);
@ -323,9 +324,8 @@ TEST_P(basic, end_to_end) {
INSTANTIATE_TEST_SUITE_P(
dwarfs, basic,
::testing::Combine(::testing::ValuesIn(compressions),
::testing::Values(12, 15, 20, 28),
::testing::Values(file_order_mode::NONE,
file_order_mode::PATH,
file_order_mode::SCRIPT,
file_order_mode::SIMILARITY)));
::testing::Combine(
::testing::ValuesIn(compressions), ::testing::Values(12, 15, 20, 28),
::testing::Values(file_order_mode::NONE, file_order_mode::PATH,
file_order_mode::SCRIPT, file_order_mode::NILSIMSA,
file_order_mode::SIMILARITY)));