mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 03:49:44 -04:00
Category-dependent block compression
This commit is contained in:
parent
92226a73bd
commit
3f0d7c14fd
@ -354,6 +354,7 @@ list(
|
||||
LIBDWARFS_SRC
|
||||
src/dwarfs/block_cache.cpp
|
||||
src/dwarfs/block_compressor.cpp
|
||||
src/dwarfs/block_compressor_parser.cpp
|
||||
src/dwarfs/block_manager.cpp
|
||||
src/dwarfs/block_range.cpp
|
||||
src/dwarfs/builtin_script.cpp
|
||||
|
@ -65,6 +65,8 @@ class block_compressor {
|
||||
|
||||
compression_type type() const { return impl_->type(); }
|
||||
|
||||
std::string describe() const { return impl_->describe(); }
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
@ -77,6 +79,7 @@ class block_compressor {
|
||||
compress(std::vector<uint8_t>&& data) const = 0;
|
||||
|
||||
virtual compression_type type() const = 0;
|
||||
virtual std::string describe() const = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
37
include/dwarfs/block_compressor_parser.h
Normal file
37
include/dwarfs/block_compressor_parser.h
Normal file
@ -0,0 +1,37 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
struct block_compressor_parser {
|
||||
public:
|
||||
block_compressor parse(std::string_view arg) const;
|
||||
std::string to_string(block_compressor const& opts) const;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
@ -41,22 +41,22 @@ class contextual_option {
|
||||
using policy_type = Policy;
|
||||
using context_argument_type = typename policy_type::ContextArgumentType;
|
||||
using context_type = typename policy_type::ContextType;
|
||||
using option_type = typename policy_type::OptionType;
|
||||
using value_type = typename policy_type::ValueType;
|
||||
|
||||
template <typename OptionType, typename ContextParser, typename OptionParser>
|
||||
friend class contextual_option_parser;
|
||||
|
||||
contextual_option() = default;
|
||||
explicit contextual_option(option_type const& def)
|
||||
explicit contextual_option(value_type const& def)
|
||||
: default_{def} {}
|
||||
|
||||
void set_default(option_type const& val) { default_ = val; }
|
||||
void set_default(value_type const& val) { default_ = val; }
|
||||
|
||||
void add_contextual(context_type const& ctx, option_type const& val) {
|
||||
contextual_[ctx] = val;
|
||||
bool add_contextual(context_type const& ctx, value_type const& val) {
|
||||
return contextual_.emplace(ctx, val).second;
|
||||
}
|
||||
|
||||
std::optional<option_type>
|
||||
std::optional<value_type>
|
||||
get_optional(context_argument_type const& arg) const {
|
||||
if constexpr (std::is_same_v<context_type, context_argument_type>) {
|
||||
return get_optional_impl(arg);
|
||||
@ -65,7 +65,7 @@ class contextual_option {
|
||||
}
|
||||
}
|
||||
|
||||
option_type get(context_argument_type const& arg) const {
|
||||
value_type get(context_argument_type const& arg) const {
|
||||
if constexpr (std::is_same_v<context_type, context_argument_type>) {
|
||||
return get_impl(arg);
|
||||
} else {
|
||||
@ -73,9 +73,9 @@ class contextual_option {
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<option_type> get_optional() const { return default_; }
|
||||
std::optional<value_type> get_optional() const { return default_; }
|
||||
|
||||
option_type get() const { return default_.value(); }
|
||||
value_type get() const { return default_.value(); }
|
||||
|
||||
template <typename T>
|
||||
bool any_is(T&& pred) const {
|
||||
@ -88,22 +88,22 @@ class contextual_option {
|
||||
}
|
||||
|
||||
private:
|
||||
std::optional<option_type> get_optional_impl(context_type const& ctx) const {
|
||||
std::optional<value_type> get_optional_impl(context_type const& ctx) const {
|
||||
if (auto it = contextual_.find(ctx); it != contextual_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return default_;
|
||||
}
|
||||
|
||||
option_type get_impl(context_type const& ctx) const {
|
||||
value_type get_impl(context_type const& ctx) const {
|
||||
if (auto it = contextual_.find(ctx); it != contextual_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return default_.value();
|
||||
}
|
||||
|
||||
std::optional<option_type> default_;
|
||||
std::unordered_map<context_type, option_type> contextual_;
|
||||
std::optional<value_type> default_;
|
||||
std::unordered_map<context_type, value_type> contextual_;
|
||||
};
|
||||
|
||||
template <typename OptionType, typename ContextParser, typename OptionParser>
|
||||
@ -132,10 +132,10 @@ class contextual_option_parser {
|
||||
std::invoke_result_t<decltype(&ContextParser::parse),
|
||||
ContextParser, decltype(ctx)>,
|
||||
typename option_type::context_type>) {
|
||||
opt_.add_contextual(cp_.parse(ctx), val);
|
||||
add_contextual(cp_.parse(ctx), val);
|
||||
} else {
|
||||
for (auto c : cp_.parse(ctx)) {
|
||||
opt_.add_contextual(c, val);
|
||||
add_contextual(c, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -172,6 +172,14 @@ class contextual_option_parser {
|
||||
}
|
||||
|
||||
private:
|
||||
void add_contextual(typename option_type::context_type const& ctx,
|
||||
typename option_type::value_type const& val) const {
|
||||
if (!opt_.add_contextual(ctx, val)) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
"duplicate context '{}' for option '{}'", cp_.to_string(ctx), name_));
|
||||
}
|
||||
}
|
||||
|
||||
OptionType& opt_;
|
||||
ContextParser const& cp_;
|
||||
OptionParser const& op_;
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "dwarfs/options.h"
|
||||
|
@ -44,7 +44,7 @@ template <typename T>
|
||||
struct categorized_option_policy {
|
||||
using ContextArgumentType = fragment_category;
|
||||
using ContextType = fragment_category::value_type;
|
||||
using OptionType = T;
|
||||
using ValueType = T;
|
||||
|
||||
static ContextType context_from_arg(ContextArgumentType const& arg) {
|
||||
return arg.value();
|
||||
@ -53,9 +53,9 @@ struct categorized_option_policy {
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template <typename OptionType>
|
||||
template <typename ValueType>
|
||||
using categorized_option =
|
||||
contextual_option<detail::categorized_option_policy<OptionType>>;
|
||||
contextual_option<detail::categorized_option_policy<ValueType>>;
|
||||
|
||||
enum class mlock_mode { NONE, TRY, MUST };
|
||||
|
||||
|
35
src/dwarfs/block_compressor_parser.cpp
Normal file
35
src/dwarfs/block_compressor_parser.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "dwarfs/block_compressor_parser.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
block_compressor block_compressor_parser::parse(std::string_view arg) const {
|
||||
return block_compressor(std::string(arg));
|
||||
}
|
||||
|
||||
std::string
|
||||
block_compressor_parser::to_string(block_compressor const& bc) const {
|
||||
return bc.describe();
|
||||
}
|
||||
|
||||
} // namespace dwarfs
|
@ -33,7 +33,7 @@ std::vector<fragment_category::value_type>
|
||||
category_parser::parse(std::string_view arg) const {
|
||||
if (!catmgr_) {
|
||||
throw std::runtime_error(
|
||||
"cannot configure category-specific options without any categories");
|
||||
"cannot configure category-specific options without any categorizers");
|
||||
}
|
||||
|
||||
std::vector<fragment_category::value_type> rv;
|
||||
|
@ -75,6 +75,10 @@ class brotli_block_compressor final : public block_compressor::impl {
|
||||
|
||||
compression_type type() const override { return compression_type::BROTLI; }
|
||||
|
||||
std::string describe() const override {
|
||||
return fmt::format("brotli [quality={}, lgwin={}]", quality_, window_bits_);
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t const quality_;
|
||||
uint32_t const window_bits_;
|
||||
|
@ -38,6 +38,8 @@ struct lz4_compression_policy {
|
||||
reinterpret_cast<const char*>(src), reinterpret_cast<char*>(dest),
|
||||
folly::to<int>(size), folly::to<int>(destsize)));
|
||||
}
|
||||
|
||||
static std::string describe(int /*level*/) { return "lz4"; }
|
||||
};
|
||||
|
||||
struct lz4hc_compression_policy {
|
||||
@ -47,6 +49,10 @@ struct lz4hc_compression_policy {
|
||||
reinterpret_cast<const char*>(src), reinterpret_cast<char*>(dest),
|
||||
folly::to<int>(size), folly::to<int>(destsize), level));
|
||||
}
|
||||
|
||||
static std::string describe(int level) {
|
||||
return fmt::format("lz4hc [level={}]", level);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Policy>
|
||||
@ -84,6 +90,8 @@ class lz4_block_compressor final : public block_compressor::impl {
|
||||
|
||||
compression_type type() const override { return compression_type::LZ4; }
|
||||
|
||||
std::string describe() const override { return Policy::describe(level_); }
|
||||
|
||||
private:
|
||||
const int level_;
|
||||
};
|
||||
|
@ -71,6 +71,8 @@ class lzma_block_compressor final : public block_compressor::impl {
|
||||
|
||||
compression_type type() const override { return compression_type::LZMA; }
|
||||
|
||||
std::string describe() const override { return description_; }
|
||||
|
||||
private:
|
||||
std::vector<uint8_t>
|
||||
compress(const std::vector<uint8_t>& data, const lzma_filter* filters) const;
|
||||
@ -107,11 +109,16 @@ class lzma_block_compressor final : public block_compressor::impl {
|
||||
|
||||
lzma_options_lzma opt_lzma_;
|
||||
std::array<lzma_filter, 3> filters_;
|
||||
std::string description_;
|
||||
};
|
||||
|
||||
lzma_block_compressor::lzma_block_compressor(unsigned level, bool extreme,
|
||||
const std::string& binary_mode,
|
||||
unsigned dict_size) {
|
||||
unsigned dict_size)
|
||||
: description_{
|
||||
fmt::format("lzma [level={}, dict_size={}{}{}]", level, dict_size,
|
||||
extreme ? ", extreme" : "",
|
||||
binary_mode.empty() ? "" : ", binary=" + binary_mode)} {
|
||||
if (lzma_lzma_preset(&opt_lzma_, get_preset(level, extreme))) {
|
||||
DWARFS_THROW(runtime_error, "unsupported preset, possibly a bug");
|
||||
}
|
||||
|
@ -47,6 +47,8 @@ class null_block_compressor final : public block_compressor::impl {
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::NONE; }
|
||||
|
||||
std::string describe() const override { return "null"; }
|
||||
};
|
||||
|
||||
class null_block_decompressor final : public block_decompressor::impl {
|
||||
|
@ -63,6 +63,10 @@ class zstd_block_compressor final : public block_compressor::impl {
|
||||
|
||||
compression_type type() const override { return compression_type::ZSTD; }
|
||||
|
||||
std::string describe() const override {
|
||||
return fmt::format("zstd [level={}]", level_);
|
||||
}
|
||||
|
||||
private:
|
||||
class scoped_context;
|
||||
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
#include "dwarfs/block_compressor_parser.h"
|
||||
#include "dwarfs/block_manager.h"
|
||||
#include "dwarfs/builtin_script.h"
|
||||
#include "dwarfs/categorizer.h"
|
||||
@ -275,13 +276,14 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
|
||||
block_manager::config cfg;
|
||||
sys_string path_str, output_str;
|
||||
std::string memory_limit, script_arg, compression, header, schema_compression,
|
||||
std::string memory_limit, script_arg, header, schema_compression,
|
||||
metadata_compression, log_level_str, timestamp, time_resolution,
|
||||
progress_mode, recompress_opts, pack_metadata, file_hash_algo,
|
||||
debug_filter, max_similarity_size, input_list_str, chmod_str,
|
||||
categorizer_list_str;
|
||||
std::vector<sys_string> filter;
|
||||
std::vector<std::string> order, max_lookback_blocks, window_size, window_step;
|
||||
std::vector<std::string> order, max_lookback_blocks, window_size, window_step,
|
||||
compression;
|
||||
size_t num_workers, num_scanner_workers;
|
||||
bool no_progress = false, remove_header = false, no_section_index = false,
|
||||
force_overwrite = false;
|
||||
@ -292,11 +294,12 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
integral_value_parser<size_t> max_lookback_parser;
|
||||
integral_value_parser<unsigned> window_size_parser(6, 24);
|
||||
integral_value_parser<unsigned> window_step_parser(0, 8);
|
||||
fragment_order_parser order_parser;
|
||||
block_compressor_parser compressor_parser;
|
||||
|
||||
scanner_options options;
|
||||
|
||||
auto order_desc =
|
||||
"inode fragments order (" + fragment_order_parser::choices() + ")";
|
||||
auto order_desc = "inode fragments order (" + order_parser.choices() + ")";
|
||||
|
||||
auto progress_desc = "progress mode (" +
|
||||
(from(progress_modes) | get<0>() | unsplit(", ")) + ")";
|
||||
@ -430,7 +433,8 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
po::options_description compressor_opts("Compressor options");
|
||||
compressor_opts.add_options()
|
||||
("compression,C",
|
||||
po::value<std::string>(&compression),
|
||||
// po::value<std::string>(&compression), // TODO
|
||||
po::value<std::vector<std::string>>(&compression)->multitoken(),
|
||||
"block compression algorithm")
|
||||
("schema-compression",
|
||||
po::value<std::string>(&schema_compression),
|
||||
@ -590,7 +594,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
}
|
||||
|
||||
if (!vm.count("compression")) {
|
||||
compression = defaults.data_compression;
|
||||
compression.push_back(std::string(defaults.data_compression));
|
||||
}
|
||||
|
||||
if (!vm.count("schema-compression")) {
|
||||
@ -913,13 +917,14 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
|
||||
progress prog(std::move(updater), interval_ms);
|
||||
|
||||
block_compressor bc(compression);
|
||||
block_compressor bc(compression.front()); // TODO
|
||||
block_compressor schema_bc(schema_compression);
|
||||
block_compressor metadata_bc(metadata_compression);
|
||||
|
||||
auto min_memory_req = num_workers * (UINT64_C(1) << cfg.block_size_bits);
|
||||
|
||||
if (mem_limit < min_memory_req && compression != "null") {
|
||||
// TODO:
|
||||
if (mem_limit < min_memory_req /* && compression != "null" */) {
|
||||
LOG_WARN << "low memory limit (" << size_with_unit(mem_limit) << "), need "
|
||||
<< size_with_unit(min_memory_req) << " to efficiently compress "
|
||||
<< size_with_unit(UINT64_C(1) << cfg.block_size_bits)
|
||||
@ -951,6 +956,77 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
os = std::make_unique<std::ostringstream>();
|
||||
}
|
||||
|
||||
// TODO: the whole re-writing thing will be a bit weird in combination
|
||||
// with categories; we'd likely require a "category"-section to be
|
||||
// present (which we'll also require for bit-identical mode)
|
||||
|
||||
if (!categorizer_list_str.empty()) {
|
||||
std::vector<std::string> categorizer_list;
|
||||
boost::split(categorizer_list, categorizer_list_str, boost::is_any_of(","));
|
||||
|
||||
options.inode.categorizer_mgr = std::make_shared<categorizer_manager>(lgr);
|
||||
|
||||
for (auto const& name : categorizer_list) {
|
||||
options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm));
|
||||
}
|
||||
}
|
||||
|
||||
category_parser cp(options.inode.categorizer_mgr);
|
||||
|
||||
try {
|
||||
contextual_option_parser cop("--order", options.inode.fragment_order, cp,
|
||||
order_parser);
|
||||
cop.parse(order);
|
||||
cop.dump(std::cerr);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
categorized_option<size_t> max_lookback_opt;
|
||||
contextual_option_parser cop("--max-lookback-blocks", max_lookback_opt, cp,
|
||||
max_lookback_parser);
|
||||
cop.parse(max_lookback_blocks);
|
||||
cop.dump(std::cerr);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
categorized_option<unsigned> window_size_opt;
|
||||
contextual_option_parser cop("--window-size", window_size_opt, cp,
|
||||
window_size_parser);
|
||||
cop.parse(window_size);
|
||||
cop.dump(std::cerr);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
categorized_option<unsigned> window_step_opt;
|
||||
contextual_option_parser cop("--window-step", window_step_opt, cp,
|
||||
window_step_parser);
|
||||
cop.parse(window_step);
|
||||
cop.dump(std::cerr);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
categorized_option<block_compressor> compression_opt;
|
||||
contextual_option_parser cop("--compression", compression_opt, cp,
|
||||
compressor_parser);
|
||||
cop.parse(compression);
|
||||
cop.dump(std::cerr);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
filesystem_writer fsw(*os, lgr, wg_compress, prog, bc, schema_bc, metadata_bc,
|
||||
fswopts, header_ifs.get());
|
||||
|
||||
@ -962,61 +1038,6 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
fsw, rw_opts);
|
||||
wg_compress.wait();
|
||||
} else {
|
||||
if (!categorizer_list_str.empty()) {
|
||||
std::vector<std::string> categorizer_list;
|
||||
boost::split(categorizer_list, categorizer_list_str,
|
||||
boost::is_any_of(","));
|
||||
|
||||
options.inode.categorizer_mgr =
|
||||
std::make_shared<categorizer_manager>(lgr);
|
||||
|
||||
for (auto const& name : categorizer_list) {
|
||||
options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm));
|
||||
}
|
||||
}
|
||||
|
||||
category_parser cp(options.inode.categorizer_mgr);
|
||||
|
||||
try {
|
||||
fragment_order_parser fop;
|
||||
contextual_option_parser("--order", options.inode.fragment_order, cp,
|
||||
fop)
|
||||
.parse(order);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
categorized_option<size_t> max_lookback_opt;
|
||||
contextual_option_parser("--max-lookback-blocks", max_lookback_opt, cp,
|
||||
max_lookback_parser)
|
||||
.parse(max_lookback_blocks);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
categorized_option<unsigned> window_size_opt;
|
||||
contextual_option_parser("--window-size", window_size_opt, cp,
|
||||
window_size_parser)
|
||||
.parse(window_size);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
categorized_option<unsigned> window_step_opt;
|
||||
contextual_option_parser("--window-step", window_step_opt, cp,
|
||||
window_step_parser)
|
||||
.parse(window_step);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << e.what();
|
||||
return 1;
|
||||
}
|
||||
|
||||
scanner s(lgr, wg_scanner, cfg, entry_factory::create(),
|
||||
std::make_shared<os_access_generic>(), std::move(script),
|
||||
options);
|
||||
|
Loading…
x
Reference in New Issue
Block a user