mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 20:41:04 -04:00
add segmenter_factory
This commit is contained in:
parent
8fa157bf92
commit
aacb9a0d94
@ -399,6 +399,7 @@ list(
|
|||||||
src/dwarfs/safe_main.cpp
|
src/dwarfs/safe_main.cpp
|
||||||
src/dwarfs/scanner.cpp
|
src/dwarfs/scanner.cpp
|
||||||
src/dwarfs/segmenter.cpp
|
src/dwarfs/segmenter.cpp
|
||||||
|
src/dwarfs/segmenter_factory.cpp
|
||||||
src/dwarfs/similarity.cpp
|
src/dwarfs/similarity.cpp
|
||||||
src/dwarfs/similarity_ordering.cpp
|
src/dwarfs/similarity_ordering.cpp
|
||||||
src/dwarfs/string_table.cpp
|
src/dwarfs/string_table.cpp
|
||||||
|
48
include/dwarfs/categorized_option.h
Normal file
48
include/dwarfs/categorized_option.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "dwarfs/contextual_option.h"
|
||||||
|
#include "dwarfs/fragment_category.h"
|
||||||
|
|
||||||
|
namespace dwarfs {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct categorized_option_policy {
|
||||||
|
using ContextArgumentType = fragment_category;
|
||||||
|
using ContextType = fragment_category::value_type;
|
||||||
|
using ValueType = T;
|
||||||
|
|
||||||
|
static ContextType context_from_arg(ContextArgumentType const& arg) {
|
||||||
|
return arg.value();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template <typename ValueType>
|
||||||
|
using categorized_option =
|
||||||
|
contextual_option<detail::categorized_option_policy<ValueType>>;
|
||||||
|
|
||||||
|
} // namespace dwarfs
|
@ -28,9 +28,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
#include "dwarfs/contextual_option.h"
|
#include "dwarfs/categorized_option.h"
|
||||||
#include "dwarfs/file_stat.h"
|
#include "dwarfs/file_stat.h"
|
||||||
#include "dwarfs/fragment_category.h"
|
|
||||||
#include "dwarfs/types.h"
|
#include "dwarfs/types.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
@ -38,25 +37,6 @@ namespace dwarfs {
|
|||||||
class categorizer_manager;
|
class categorizer_manager;
|
||||||
class entry;
|
class entry;
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct categorized_option_policy {
|
|
||||||
using ContextArgumentType = fragment_category;
|
|
||||||
using ContextType = fragment_category::value_type;
|
|
||||||
using ValueType = T;
|
|
||||||
|
|
||||||
static ContextType context_from_arg(ContextArgumentType const& arg) {
|
|
||||||
return arg.value();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace detail
|
|
||||||
|
|
||||||
template <typename ValueType>
|
|
||||||
using categorized_option =
|
|
||||||
contextual_option<detail::categorized_option_policy<ValueType>>;
|
|
||||||
|
|
||||||
enum class mlock_mode { NONE, TRY, MUST };
|
enum class mlock_mode { NONE, TRY, MUST };
|
||||||
|
|
||||||
enum class cache_tidy_strategy { NONE, EXPIRY_TIME, BLOCK_SWAPPED_OUT };
|
enum class cache_tidy_strategy { NONE, EXPIRY_TIME, BLOCK_SWAPPED_OUT };
|
||||||
|
@ -27,8 +27,6 @@
|
|||||||
#include <span>
|
#include <span>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "dwarfs/segmenter.h"
|
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
|
|
||||||
struct scanner_options;
|
struct scanner_options;
|
||||||
@ -39,11 +37,12 @@ class logger;
|
|||||||
class os_access;
|
class os_access;
|
||||||
class progress;
|
class progress;
|
||||||
class script;
|
class script;
|
||||||
|
class segmenter_factory;
|
||||||
class worker_group;
|
class worker_group;
|
||||||
|
|
||||||
class scanner {
|
class scanner {
|
||||||
public:
|
public:
|
||||||
scanner(logger& lgr, worker_group& wg, const segmenter::config& cfg,
|
scanner(logger& lgr, worker_group& wg, std::shared_ptr<segmenter_factory> sf,
|
||||||
std::shared_ptr<entry_factory> ef, std::shared_ptr<os_access> os,
|
std::shared_ptr<entry_factory> ef, std::shared_ptr<os_access> os,
|
||||||
std::shared_ptr<script> scr, const scanner_options& options);
|
std::shared_ptr<script> scr, const scanner_options& options);
|
||||||
|
|
||||||
|
@ -38,12 +38,11 @@ class progress;
|
|||||||
class segmenter {
|
class segmenter {
|
||||||
public:
|
public:
|
||||||
struct config {
|
struct config {
|
||||||
unsigned blockhash_window_size;
|
unsigned blockhash_window_size{12};
|
||||||
unsigned window_increment_shift{1};
|
unsigned window_increment_shift{1};
|
||||||
size_t max_active_blocks{1};
|
size_t max_active_blocks{1};
|
||||||
size_t memory_limit{256 << 20};
|
|
||||||
unsigned block_size_bits{22};
|
|
||||||
unsigned bloom_filter_size{4};
|
unsigned bloom_filter_size{4};
|
||||||
|
unsigned block_size_bits{22};
|
||||||
};
|
};
|
||||||
|
|
||||||
using block_ready_cb = folly::Function<size_t(std::shared_ptr<block_data>)>;
|
using block_ready_cb = folly::Function<size_t(std::shared_ptr<block_data>)>;
|
||||||
|
66
include/dwarfs/segmenter_factory.h
Normal file
66
include/dwarfs/segmenter_factory.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "dwarfs/categorized_option.h"
|
||||||
|
#include "dwarfs/segmenter.h"
|
||||||
|
|
||||||
|
namespace dwarfs {
|
||||||
|
|
||||||
|
class logger;
|
||||||
|
class progress;
|
||||||
|
|
||||||
|
class segmenter_factory {
|
||||||
|
public:
|
||||||
|
struct config {
|
||||||
|
categorized_option<unsigned> blockhash_window_size;
|
||||||
|
categorized_option<unsigned> window_increment_shift;
|
||||||
|
categorized_option<size_t> max_active_blocks;
|
||||||
|
categorized_option<unsigned> bloom_filter_size;
|
||||||
|
unsigned block_size_bits{22};
|
||||||
|
};
|
||||||
|
|
||||||
|
segmenter_factory(logger& lgr, progress& prog, config const& cfg);
|
||||||
|
|
||||||
|
segmenter create(fragment_category cat, std::shared_ptr<block_manager> blkmgr,
|
||||||
|
segmenter::block_ready_cb block_ready) const {
|
||||||
|
return impl_->create(cat, std::move(blkmgr), std::move(block_ready));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_block_size() const { return impl_->get_block_size(); }
|
||||||
|
|
||||||
|
class impl {
|
||||||
|
public:
|
||||||
|
virtual ~impl() = default;
|
||||||
|
|
||||||
|
virtual segmenter
|
||||||
|
create(fragment_category cat, std::shared_ptr<block_manager> blkmgr,
|
||||||
|
segmenter::block_ready_cb block_ready) const = 0;
|
||||||
|
virtual size_t get_block_size() const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<impl> impl_;
|
||||||
|
};
|
||||||
|
} // namespace dwarfs
|
@ -57,6 +57,7 @@
|
|||||||
#include "dwarfs/progress.h"
|
#include "dwarfs/progress.h"
|
||||||
#include "dwarfs/scanner.h"
|
#include "dwarfs/scanner.h"
|
||||||
#include "dwarfs/script.h"
|
#include "dwarfs/script.h"
|
||||||
|
#include "dwarfs/segmenter_factory.h"
|
||||||
#include "dwarfs/string_table.h"
|
#include "dwarfs/string_table.h"
|
||||||
#include "dwarfs/util.h"
|
#include "dwarfs/util.h"
|
||||||
#include "dwarfs/version.h"
|
#include "dwarfs/version.h"
|
||||||
@ -273,7 +274,7 @@ std::string status_string(progress const& p, size_t width) {
|
|||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
class scanner_ final : public scanner::impl {
|
class scanner_ final : public scanner::impl {
|
||||||
public:
|
public:
|
||||||
scanner_(logger& lgr, worker_group& wg, const segmenter::config& config,
|
scanner_(logger& lgr, worker_group& wg, std::shared_ptr<segmenter_factory> sf,
|
||||||
std::shared_ptr<entry_factory> ef, std::shared_ptr<os_access> os,
|
std::shared_ptr<entry_factory> ef, std::shared_ptr<os_access> os,
|
||||||
std::shared_ptr<script> scr, const scanner_options& options);
|
std::shared_ptr<script> scr, const scanner_options& options);
|
||||||
|
|
||||||
@ -297,25 +298,25 @@ class scanner_ final : public scanner::impl {
|
|||||||
|
|
||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
worker_group& wg_;
|
worker_group& wg_;
|
||||||
const segmenter::config& cfg_;
|
scanner_options const& options_;
|
||||||
const scanner_options& options_;
|
std::shared_ptr<segmenter_factory> segmenter_factory_;
|
||||||
std::shared_ptr<entry_factory> entry_;
|
std::shared_ptr<entry_factory> entry_factory_;
|
||||||
std::shared_ptr<os_access> os_;
|
std::shared_ptr<os_access> os_;
|
||||||
std::shared_ptr<script> script_;
|
std::shared_ptr<script> script_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
|
scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
|
||||||
const segmenter::config& cfg,
|
std::shared_ptr<segmenter_factory> sf,
|
||||||
std::shared_ptr<entry_factory> ef,
|
std::shared_ptr<entry_factory> ef,
|
||||||
std::shared_ptr<os_access> os,
|
std::shared_ptr<os_access> os,
|
||||||
std::shared_ptr<script> scr,
|
std::shared_ptr<script> scr,
|
||||||
const scanner_options& options)
|
const scanner_options& options)
|
||||||
: LOG_PROXY_INIT(lgr)
|
: LOG_PROXY_INIT(lgr)
|
||||||
, wg_(wg)
|
, wg_{wg}
|
||||||
, cfg_(cfg)
|
, options_{options}
|
||||||
, options_(options)
|
, segmenter_factory_{std::move(sf)}
|
||||||
, entry_(std::move(ef))
|
, entry_factory_{std::move(ef)}
|
||||||
, os_(std::move(os))
|
, os_(std::move(os))
|
||||||
, script_(std::move(scr)) {}
|
, script_(std::move(scr)) {}
|
||||||
|
|
||||||
@ -325,7 +326,7 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
std::shared_ptr<dir> parent, progress& prog,
|
std::shared_ptr<dir> parent, progress& prog,
|
||||||
detail::file_scanner& fs, bool debug_filter) {
|
detail::file_scanner& fs, bool debug_filter) {
|
||||||
try {
|
try {
|
||||||
auto pe = entry_->create(*os_, name, parent);
|
auto pe = entry_factory_->create(*os_, name, parent);
|
||||||
bool exclude = false;
|
bool exclude = false;
|
||||||
|
|
||||||
if (script_) {
|
if (script_) {
|
||||||
@ -429,7 +430,7 @@ template <typename LoggerPolicy>
|
|||||||
std::shared_ptr<entry>
|
std::shared_ptr<entry>
|
||||||
scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
|
scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
|
||||||
progress& prog, detail::file_scanner& fs) {
|
progress& prog, detail::file_scanner& fs) {
|
||||||
auto root = entry_->create(*os_, path);
|
auto root = entry_factory_->create(*os_, path);
|
||||||
bool const debug_filter = options_.debug_filter_function.has_value();
|
bool const debug_filter = options_.debug_filter_function.has_value();
|
||||||
|
|
||||||
if (root->type() != entry::E_DIR) {
|
if (root->type() != entry::E_DIR) {
|
||||||
@ -489,7 +490,7 @@ scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
|
|||||||
|
|
||||||
auto ti = LOG_TIMED_INFO;
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
|
||||||
auto root = entry_->create(*os_, path);
|
auto root = entry_factory_->create(*os_, path);
|
||||||
|
|
||||||
if (root->type() != entry::E_DIR) {
|
if (root->type() != entry::E_DIR) {
|
||||||
DWARFS_THROW(runtime_error,
|
DWARFS_THROW(runtime_error,
|
||||||
@ -673,8 +674,6 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
// which gets run on a worker groups; each batch keeps track of
|
// which gets run on a worker groups; each batch keeps track of
|
||||||
// its CPU time and affects thread naming
|
// its CPU time and affects thread naming
|
||||||
|
|
||||||
// segmenter seg(LOG_GET_LOGGER, prog, cfg_, fsw);
|
|
||||||
|
|
||||||
auto blockmgr = std::make_shared<block_manager>();
|
auto blockmgr = std::make_shared<block_manager>();
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -695,12 +694,11 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
wg_blockify.add_job(
|
wg_blockify.add_job(
|
||||||
[this, catmgr, blockmgr, category, meta, &prog, &fsw,
|
[this, catmgr, blockmgr, category, meta, &prog, &fsw,
|
||||||
span = im.ordered_span(category, wg_ordering)]() mutable {
|
span = im.ordered_span(category, wg_ordering)]() mutable {
|
||||||
// TODO: segmenter config per-category
|
auto seg = segmenter_factory_->create(
|
||||||
segmenter seg(LOG_GET_LOGGER, prog, blockmgr, cfg_,
|
category, blockmgr, [category, meta, &fsw](auto block) {
|
||||||
[category, meta, &fsw](auto block) {
|
return fsw.write_block(category.value(), std::move(block),
|
||||||
return fsw.write_block(category.value(),
|
meta);
|
||||||
std::move(block), meta);
|
});
|
||||||
});
|
|
||||||
|
|
||||||
for (auto ino : span) {
|
for (auto ino : span) {
|
||||||
prog.current.store(ino.get());
|
prog.current.store(ino.get());
|
||||||
@ -844,7 +842,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
mv2.gids() = ge_data.get_gids();
|
mv2.gids() = ge_data.get_gids();
|
||||||
mv2.modes() = ge_data.get_modes();
|
mv2.modes() = ge_data.get_modes();
|
||||||
mv2.timestamp_base() = ge_data.get_timestamp_base();
|
mv2.timestamp_base() = ge_data.get_timestamp_base();
|
||||||
mv2.block_size() = UINT32_C(1) << cfg_.block_size_bits;
|
mv2.block_size() = segmenter_factory_->get_block_size();
|
||||||
mv2.total_fs_size() = prog.original_size;
|
mv2.total_fs_size() = prog.original_size;
|
||||||
mv2.total_hardlink_size() = prog.hardlink_size;
|
mv2.total_hardlink_size() = prog.hardlink_size;
|
||||||
mv2.options() = fsopts;
|
mv2.options() = fsopts;
|
||||||
@ -870,12 +868,13 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
<< ")";
|
<< ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
scanner::scanner(logger& lgr, worker_group& wg, const segmenter::config& cfg,
|
scanner::scanner(logger& lgr, worker_group& wg,
|
||||||
|
std::shared_ptr<segmenter_factory> sf,
|
||||||
std::shared_ptr<entry_factory> ef,
|
std::shared_ptr<entry_factory> ef,
|
||||||
std::shared_ptr<os_access> os, std::shared_ptr<script> scr,
|
std::shared_ptr<os_access> os, std::shared_ptr<script> scr,
|
||||||
const scanner_options& options)
|
const scanner_options& options)
|
||||||
: impl_(make_unique_logging_object<impl, scanner_, logger_policies>(
|
: impl_(make_unique_logging_object<impl, scanner_, logger_policies>(
|
||||||
lgr, wg, cfg, std::move(ef), std::move(os), std::move(scr),
|
lgr, wg, std::move(sf), std::move(ef), std::move(os), std::move(scr),
|
||||||
options)) {}
|
options)) {}
|
||||||
|
|
||||||
} // namespace dwarfs
|
} // namespace dwarfs
|
||||||
|
@ -349,7 +349,7 @@ class segmenter_ final : public segmenter::impl {
|
|||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
progress& prog_;
|
progress& prog_;
|
||||||
std::shared_ptr<block_manager> blkmgr_;
|
std::shared_ptr<block_manager> blkmgr_;
|
||||||
const segmenter::config& cfg_;
|
segmenter::config const cfg_;
|
||||||
segmenter::block_ready_cb block_ready_;
|
segmenter::block_ready_cb block_ready_;
|
||||||
|
|
||||||
size_t const window_size_;
|
size_t const window_size_;
|
||||||
|
62
src/dwarfs/segmenter_factory.cpp
Normal file
62
src/dwarfs/segmenter_factory.cpp
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "dwarfs/segmenter_factory.h"
|
||||||
|
|
||||||
|
namespace dwarfs {
|
||||||
|
|
||||||
|
class segmenter_factory_ final : public segmenter_factory::impl {
|
||||||
|
public:
|
||||||
|
segmenter_factory_(logger& lgr, progress& prog,
|
||||||
|
const segmenter_factory::config& cfg)
|
||||||
|
: lgr_{lgr}
|
||||||
|
, prog_{prog}
|
||||||
|
, cfg_{cfg} {}
|
||||||
|
|
||||||
|
segmenter create(fragment_category cat, std::shared_ptr<block_manager> blkmgr,
|
||||||
|
segmenter::block_ready_cb block_ready) const override {
|
||||||
|
segmenter::config cfg;
|
||||||
|
|
||||||
|
cfg.blockhash_window_size = cfg_.blockhash_window_size.get(cat);
|
||||||
|
cfg.window_increment_shift = cfg_.window_increment_shift.get(cat);
|
||||||
|
cfg.max_active_blocks = cfg_.max_active_blocks.get(cat);
|
||||||
|
cfg.bloom_filter_size = cfg_.bloom_filter_size.get(cat);
|
||||||
|
cfg.block_size_bits = cfg_.block_size_bits;
|
||||||
|
|
||||||
|
return segmenter(lgr_, prog_, std::move(blkmgr), cfg,
|
||||||
|
std::move(block_ready));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_block_size() const override {
|
||||||
|
return static_cast<size_t>(1) << cfg_.block_size_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
logger& lgr_;
|
||||||
|
progress& prog_;
|
||||||
|
segmenter_factory::config cfg_;
|
||||||
|
};
|
||||||
|
|
||||||
|
segmenter_factory::segmenter_factory(logger& lgr, progress& prog,
|
||||||
|
config const& cfg)
|
||||||
|
: impl_(std::make_unique<segmenter_factory_>(lgr, prog, cfg)) {}
|
||||||
|
|
||||||
|
} // namespace dwarfs
|
@ -72,7 +72,7 @@
|
|||||||
#include "dwarfs/progress.h"
|
#include "dwarfs/progress.h"
|
||||||
#include "dwarfs/scanner.h"
|
#include "dwarfs/scanner.h"
|
||||||
#include "dwarfs/script.h"
|
#include "dwarfs/script.h"
|
||||||
#include "dwarfs/segmenter.h"
|
#include "dwarfs/segmenter_factory.h"
|
||||||
#include "dwarfs/terminal.h"
|
#include "dwarfs/terminal.h"
|
||||||
#include "dwarfs/tool.h"
|
#include "dwarfs/tool.h"
|
||||||
#include "dwarfs/util.h"
|
#include "dwarfs/util.h"
|
||||||
@ -274,7 +274,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
|
|
||||||
const size_t num_cpu = std::max(folly::hardware_concurrency(), 1u);
|
const size_t num_cpu = std::max(folly::hardware_concurrency(), 1u);
|
||||||
|
|
||||||
segmenter::config cfg;
|
segmenter_factory::config sf_config;
|
||||||
sys_string path_str, output_str;
|
sys_string path_str, output_str;
|
||||||
std::string memory_limit, script_arg, header, schema_compression,
|
std::string memory_limit, script_arg, header, schema_compression,
|
||||||
metadata_compression, log_level_str, timestamp, time_resolution,
|
metadata_compression, log_level_str, timestamp, time_resolution,
|
||||||
@ -283,7 +283,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
categorizer_list_str;
|
categorizer_list_str;
|
||||||
std::vector<sys_string> filter;
|
std::vector<sys_string> filter;
|
||||||
std::vector<std::string> order, max_lookback_blocks, window_size, window_step,
|
std::vector<std::string> order, max_lookback_blocks, window_size, window_step,
|
||||||
compression;
|
bloom_filter_size, compression;
|
||||||
size_t num_workers, num_scanner_workers;
|
size_t num_workers, num_scanner_workers;
|
||||||
bool no_progress = false, remove_header = false, no_section_index = false,
|
bool no_progress = false, remove_header = false, no_section_index = false,
|
||||||
force_overwrite = false;
|
force_overwrite = false;
|
||||||
@ -294,6 +294,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
integral_value_parser<size_t> max_lookback_parser;
|
integral_value_parser<size_t> max_lookback_parser;
|
||||||
integral_value_parser<unsigned> window_size_parser(6, 24);
|
integral_value_parser<unsigned> window_size_parser(6, 24);
|
||||||
integral_value_parser<unsigned> window_step_parser(0, 8);
|
integral_value_parser<unsigned> window_step_parser(0, 8);
|
||||||
|
integral_value_parser<unsigned> bloom_filter_size_parser(0, 8);
|
||||||
fragment_order_parser order_parser;
|
fragment_order_parser order_parser;
|
||||||
block_compressor_parser compressor_parser;
|
block_compressor_parser compressor_parser;
|
||||||
|
|
||||||
@ -353,7 +354,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
po::options_description advanced_opts("Advanced options");
|
po::options_description advanced_opts("Advanced options");
|
||||||
advanced_opts.add_options()
|
advanced_opts.add_options()
|
||||||
("block-size-bits,S",
|
("block-size-bits,S",
|
||||||
po::value<unsigned>(&cfg.block_size_bits),
|
po::value<unsigned>(&sf_config.block_size_bits),
|
||||||
"block size bits (size = 2^arg bits)")
|
"block size bits (size = 2^arg bits)")
|
||||||
("num-workers,N",
|
("num-workers,N",
|
||||||
po::value<size_t>(&num_workers)->default_value(num_cpu),
|
po::value<size_t>(&num_workers)->default_value(num_cpu),
|
||||||
@ -426,7 +427,8 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
po::value<std::vector<std::string>>(&window_step)->multitoken(),
|
po::value<std::vector<std::string>>(&window_step)->multitoken(),
|
||||||
"window step (as right shift of size)")
|
"window step (as right shift of size)")
|
||||||
("bloom-filter-size",
|
("bloom-filter-size",
|
||||||
po::value<unsigned>(&cfg.bloom_filter_size)->default_value(4),
|
// po::value<unsigned>(&cfg.bloom_filter_size)->default_value(4), // TODO
|
||||||
|
po::value<std::vector<std::string>>(&bloom_filter_size)->multitoken(),
|
||||||
"bloom filter size (2^N*values bits)")
|
"bloom filter size (2^N*values bits)")
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -601,7 +603,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
auto const& defaults = levels[level];
|
auto const& defaults = levels[level];
|
||||||
|
|
||||||
if (!vm.count("block-size-bits")) {
|
if (!vm.count("block-size-bits")) {
|
||||||
cfg.block_size_bits = defaults.block_size_bits;
|
sf_config.block_size_bits = defaults.block_size_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!vm.count("compression")) {
|
if (!vm.count("compression")) {
|
||||||
@ -617,22 +619,23 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!vm.count("max-lookback-blocks")) {
|
if (!vm.count("max-lookback-blocks")) {
|
||||||
cfg.max_active_blocks = 1; // TODO
|
|
||||||
max_lookback_blocks.push_back(folly::to<std::string>(1));
|
max_lookback_blocks.push_back(folly::to<std::string>(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!vm.count("window-size")) {
|
if (!vm.count("window-size")) {
|
||||||
cfg.blockhash_window_size = defaults.window_size; // TODO
|
|
||||||
window_size.push_back(folly::to<std::string>(defaults.window_size));
|
window_size.push_back(folly::to<std::string>(defaults.window_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!vm.count("window-step")) {
|
if (!vm.count("window-step")) {
|
||||||
cfg.window_increment_shift = defaults.window_step; // TODO
|
|
||||||
window_step.push_back(folly::to<std::string>(defaults.window_step));
|
window_step.push_back(folly::to<std::string>(defaults.window_step));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cfg.block_size_bits < min_block_size_bits ||
|
if (!vm.count("bloom-filter-size")) {
|
||||||
cfg.block_size_bits > max_block_size_bits) {
|
bloom_filter_size.push_back(folly::to<std::string>(4));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sf_config.block_size_bits < min_block_size_bits ||
|
||||||
|
sf_config.block_size_bits > max_block_size_bits) {
|
||||||
std::cerr << "error: block size must be between " << min_block_size_bits
|
std::cerr << "error: block size must be between " << min_block_size_bits
|
||||||
<< " and " << max_block_size_bits << "\n";
|
<< " and " << max_block_size_bits << "\n";
|
||||||
return 1;
|
return 1;
|
||||||
@ -923,13 +926,14 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
|
|
||||||
progress prog(std::move(updater), interval_ms);
|
progress prog(std::move(updater), interval_ms);
|
||||||
|
|
||||||
auto min_memory_req = num_workers * (UINT64_C(1) << cfg.block_size_bits);
|
auto min_memory_req =
|
||||||
|
num_workers * (UINT64_C(1) << sf_config.block_size_bits);
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
if (mem_limit < min_memory_req /* && compression != "null" */) {
|
if (mem_limit < min_memory_req /* && compression != "null" */) {
|
||||||
LOG_WARN << "low memory limit (" << size_with_unit(mem_limit) << "), need "
|
LOG_WARN << "low memory limit (" << size_with_unit(mem_limit) << "), need "
|
||||||
<< size_with_unit(min_memory_req) << " to efficiently compress "
|
<< size_with_unit(min_memory_req) << " to efficiently compress "
|
||||||
<< size_with_unit(UINT64_C(1) << cfg.block_size_bits)
|
<< size_with_unit(UINT64_C(1) << sf_config.block_size_bits)
|
||||||
<< " blocks with " << num_workers << " threads";
|
<< " blocks with " << num_workers << " threads";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -976,44 +980,45 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
category_parser cp(options.inode.categorizer_mgr);
|
category_parser cp(options.inode.categorizer_mgr);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
contextual_option_parser cop("--order", options.inode.fragment_order, cp,
|
{
|
||||||
order_parser);
|
contextual_option_parser cop("--order", options.inode.fragment_order, cp,
|
||||||
cop.parse(defaults.order);
|
order_parser);
|
||||||
cop.parse(order);
|
cop.parse(defaults.order);
|
||||||
LOG_DEBUG << cop.as_string();
|
cop.parse(order);
|
||||||
} catch (std::exception const& e) {
|
LOG_DEBUG << cop.as_string();
|
||||||
LOG_ERROR << e.what();
|
}
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
{
|
||||||
categorized_option<size_t> max_lookback_opt;
|
contextual_option_parser cop("--max-lookback-blocks",
|
||||||
contextual_option_parser cop("--max-lookback-blocks", max_lookback_opt, cp,
|
sf_config.max_active_blocks, cp,
|
||||||
max_lookback_parser);
|
max_lookback_parser);
|
||||||
cop.parse(max_lookback_blocks);
|
cop.parse(max_lookback_blocks);
|
||||||
LOG_DEBUG << cop.as_string();
|
LOG_DEBUG << cop.as_string();
|
||||||
} catch (std::exception const& e) {
|
}
|
||||||
LOG_ERROR << e.what();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
{
|
||||||
categorized_option<unsigned> window_size_opt;
|
contextual_option_parser cop("--window-size",
|
||||||
contextual_option_parser cop("--window-size", window_size_opt, cp,
|
sf_config.blockhash_window_size, cp,
|
||||||
window_size_parser);
|
window_size_parser);
|
||||||
cop.parse(window_size);
|
cop.parse(window_size);
|
||||||
LOG_DEBUG << cop.as_string();
|
LOG_DEBUG << cop.as_string();
|
||||||
} catch (std::exception const& e) {
|
}
|
||||||
LOG_ERROR << e.what();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
{
|
||||||
categorized_option<unsigned> window_step_opt;
|
contextual_option_parser cop("--window-step",
|
||||||
contextual_option_parser cop("--window-step", window_step_opt, cp,
|
sf_config.window_increment_shift, cp,
|
||||||
window_step_parser);
|
window_step_parser);
|
||||||
cop.parse(window_step);
|
cop.parse(window_step);
|
||||||
LOG_DEBUG << cop.as_string();
|
LOG_DEBUG << cop.as_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
contextual_option_parser cop("--bloom-filter-size",
|
||||||
|
sf_config.bloom_filter_size, cp,
|
||||||
|
bloom_filter_size_parser);
|
||||||
|
cop.parse(bloom_filter_size);
|
||||||
|
LOG_DEBUG << cop.as_string();
|
||||||
|
}
|
||||||
} catch (std::exception const& e) {
|
} catch (std::exception const& e) {
|
||||||
LOG_ERROR << e.what();
|
LOG_ERROR << e.what();
|
||||||
return 1;
|
return 1;
|
||||||
@ -1060,7 +1065,9 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
fsw, rw_opts);
|
fsw, rw_opts);
|
||||||
wg_compress.wait();
|
wg_compress.wait();
|
||||||
} else {
|
} else {
|
||||||
scanner s(lgr, wg_scanner, cfg, entry_factory::create(),
|
auto sf = std::make_shared<segmenter_factory>(lgr, prog, sf_config);
|
||||||
|
|
||||||
|
scanner s(lgr, wg_scanner, sf, entry_factory::create(),
|
||||||
std::make_shared<os_access_generic>(), std::move(script),
|
std::make_shared<os_access_generic>(), std::move(script),
|
||||||
options);
|
options);
|
||||||
|
|
||||||
|
@ -44,6 +44,7 @@
|
|||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
#include "dwarfs/progress.h"
|
#include "dwarfs/progress.h"
|
||||||
#include "dwarfs/scanner.h"
|
#include "dwarfs/scanner.h"
|
||||||
|
#include "dwarfs/segmenter_factory.h"
|
||||||
#include "dwarfs/vfs_stat.h"
|
#include "dwarfs/vfs_stat.h"
|
||||||
|
|
||||||
#include "filter_test_data.h"
|
#include "filter_test_data.h"
|
||||||
@ -71,15 +72,26 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
|
|||||||
// force multithreading
|
// force multithreading
|
||||||
worker_group wg("worker", 4);
|
worker_group wg("worker", 4);
|
||||||
|
|
||||||
scanner s(lgr, wg, cfg, entry_factory::create(), input, scr, options);
|
|
||||||
|
|
||||||
std::ostringstream oss;
|
|
||||||
std::unique_ptr<progress> local_prog;
|
std::unique_ptr<progress> local_prog;
|
||||||
if (!prog) {
|
if (!prog) {
|
||||||
local_prog = std::make_unique<progress>([](const progress&, bool) {}, 1000);
|
local_prog = std::make_unique<progress>([](const progress&, bool) {}, 1000);
|
||||||
prog = local_prog.get();
|
prog = local_prog.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: ugly hack :-)
|
||||||
|
segmenter_factory::config sf_cfg;
|
||||||
|
sf_cfg.block_size_bits = cfg.block_size_bits;
|
||||||
|
sf_cfg.blockhash_window_size.set_default(cfg.blockhash_window_size);
|
||||||
|
sf_cfg.window_increment_shift.set_default(cfg.window_increment_shift);
|
||||||
|
sf_cfg.max_active_blocks.set_default(cfg.max_active_blocks);
|
||||||
|
sf_cfg.bloom_filter_size.set_default(cfg.bloom_filter_size);
|
||||||
|
|
||||||
|
auto sf = std::make_shared<segmenter_factory>(lgr, *prog, sf_cfg);
|
||||||
|
|
||||||
|
scanner s(lgr, wg, sf, entry_factory::create(), input, scr, options);
|
||||||
|
|
||||||
|
std::ostringstream oss;
|
||||||
|
|
||||||
block_compressor bc(compression);
|
block_compressor bc(compression);
|
||||||
filesystem_writer fsw(oss, lgr, wg, *prog, bc, bc);
|
filesystem_writer fsw(oss, lgr, wg, *prog, bc, bc);
|
||||||
fsw.add_default_compressor(bc);
|
fsw.add_default_compressor(bc);
|
||||||
|
@ -35,7 +35,7 @@
|
|||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
#include "dwarfs/progress.h"
|
#include "dwarfs/progress.h"
|
||||||
#include "dwarfs/scanner.h"
|
#include "dwarfs/scanner.h"
|
||||||
#include "dwarfs/segmenter.h"
|
#include "dwarfs/segmenter_factory.h"
|
||||||
#include "dwarfs/string_table.h"
|
#include "dwarfs/string_table.h"
|
||||||
#include "dwarfs/vfs_stat.h"
|
#include "dwarfs/vfs_stat.h"
|
||||||
#include "dwarfs/worker_group.h"
|
#include "dwarfs/worker_group.h"
|
||||||
@ -91,10 +91,13 @@ void PackParamsDirs(::benchmark::internal::Benchmark* b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string make_filesystem(::benchmark::State const& state) {
|
std::string make_filesystem(::benchmark::State const& state) {
|
||||||
segmenter::config cfg;
|
segmenter_factory::config cfg;
|
||||||
scanner_options options;
|
scanner_options options;
|
||||||
|
|
||||||
cfg.blockhash_window_size = 8;
|
cfg.blockhash_window_size.set_default(12);
|
||||||
|
cfg.window_increment_shift.set_default(1);
|
||||||
|
cfg.max_active_blocks.set_default(1);
|
||||||
|
cfg.bloom_filter_size.set_default(4);
|
||||||
cfg.block_size_bits = 12;
|
cfg.block_size_bits = 12;
|
||||||
|
|
||||||
options.with_devices = true;
|
options.with_devices = true;
|
||||||
@ -112,17 +115,19 @@ std::string make_filesystem(::benchmark::State const& state) {
|
|||||||
options.plain_symlinks_table = state.range(1);
|
options.plain_symlinks_table = state.range(1);
|
||||||
|
|
||||||
worker_group wg("writer", 4);
|
worker_group wg("writer", 4);
|
||||||
|
progress prog([](const progress&, bool) {}, 1000);
|
||||||
|
|
||||||
std::ostringstream logss;
|
std::ostringstream logss;
|
||||||
stream_logger lgr(logss); // TODO: mock
|
stream_logger lgr(logss); // TODO: mock
|
||||||
lgr.set_policy<prod_logger_policy>();
|
lgr.set_policy<prod_logger_policy>();
|
||||||
|
|
||||||
scanner s(lgr, wg, cfg, entry_factory::create(),
|
auto sf = std::make_shared<segmenter_factory>(lgr, prog, cfg);
|
||||||
|
|
||||||
|
scanner s(lgr, wg, sf, entry_factory::create(),
|
||||||
test::os_access_mock::create_test_instance(),
|
test::os_access_mock::create_test_instance(),
|
||||||
std::make_shared<test::script_mock>(), options);
|
std::make_shared<test::script_mock>(), options);
|
||||||
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
progress prog([](const progress&, bool) {}, 1000);
|
|
||||||
|
|
||||||
block_compressor bc("null");
|
block_compressor bc("null");
|
||||||
filesystem_writer fsw(oss, lgr, wg, prog, bc, bc);
|
filesystem_writer fsw(oss, lgr, wg, prog, bc, bc);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user