wip: more refactoring

This commit is contained in:
Marcus Holland-Moritz 2024-08-04 17:14:24 +02:00
parent e81c2de507
commit c1fbaf7789
35 changed files with 393 additions and 369 deletions

View File

@ -612,8 +612,6 @@ list(APPEND
src/dwarfs/error.cpp
src/dwarfs/file_access_generic.cpp
src/dwarfs/file_stat.cpp
src/dwarfs/filesystem_writer.cpp
src/dwarfs/fragment_category.cpp
src/dwarfs/fstypes.cpp
src/dwarfs/history.cpp
src/dwarfs/library_dependencies.cpp
@ -662,10 +660,12 @@ list(APPEND
src/dwarfs/writer/console_writer.cpp
src/dwarfs/writer/entry_factory.cpp
src/dwarfs/writer/filesystem_block_category_resolver.cpp
src/dwarfs/writer/filesystem_writer_factory.cpp
src/dwarfs/writer/filesystem_writer.cpp
src/dwarfs/writer/filter_debug.cpp
src/dwarfs/writer/fragment_category.cpp
src/dwarfs/writer/fragment_order_parser.cpp
src/dwarfs/writer/inode_fragments.cpp
src/dwarfs/writer/rewrite_filesystem.cpp
src/dwarfs/writer/rule_based_entry_filter.cpp
src/dwarfs/writer/scanner.cpp
src/dwarfs/writer/segmenter.cpp

View File

@ -45,8 +45,6 @@ namespace dwarfs {
struct vfs_stat;
class category_resolver;
class filesystem_writer;
class history;
class logger;
class mmif;
@ -57,6 +55,12 @@ namespace reader {
struct iovec_read_buf;
namespace internal {
class filesystem_parser;
} // namespace internal
class filesystem_v2 {
public:
filesystem_v2() = default;
@ -303,9 +307,12 @@ class filesystem_v2 {
return impl_->get_all_gids();
}
void rewrite(filesystem_writer& writer, category_resolver const& cat_resolver,
rewrite_options const& opts) const {
return impl_->rewrite(writer, cat_resolver, opts);
std::shared_ptr<internal::filesystem_parser> get_parser() const {
return impl_->get_parser();
}
std::optional<std::string> get_block_category(size_t block_number) const {
return impl_->get_block_category(block_number);
}
class impl {
@ -386,9 +393,9 @@ class filesystem_v2 {
virtual std::vector<std::string> get_all_block_categories() const = 0;
virtual std::vector<file_stat::uid_type> get_all_uids() const = 0;
virtual std::vector<file_stat::gid_type> get_all_gids() const = 0;
virtual void
rewrite(filesystem_writer& writer, category_resolver const& cat_resolver,
rewrite_options const& opts) const = 0;
virtual std::shared_ptr<internal::filesystem_parser> get_parser() const = 0;
virtual std::optional<std::string>
get_block_category(size_t block_number) const = 0;
};
private:

View File

@ -65,6 +65,10 @@ class filesystem_parser {
bool has_checksums() const;
bool has_index() const;
size_t filesystem_size() const;
std::span<uint8_t const>
section_data(dwarfs::internal::fs_section const& s) const;
private:
void find_index();

View File

@ -21,8 +21,8 @@
#pragma once
#include <dwarfs/fragment_category.h>
#include <dwarfs/writer/contextual_option.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs::writer {

View File

@ -31,7 +31,7 @@
#include <span>
#include <string_view>
#include <dwarfs/category_resolver.h>
#include <dwarfs/writer/category_resolver.h>
#include <dwarfs/writer/inode_fragments.h>
namespace boost::program_options {

View File

@ -24,14 +24,12 @@
#include <memory>
#include <vector>
#include <dwarfs/fragment_category.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs {
namespace dwarfs::writer {
class category_resolver;
namespace writer {
class category_parser {
public:
category_parser(std::shared_ptr<category_resolver const> resolver);
@ -43,6 +41,4 @@ class category_parser {
std::shared_ptr<category_resolver const> resolver_;
};
} // namespace writer
} // namespace dwarfs
} // namespace dwarfs::writer

View File

@ -24,9 +24,9 @@
#include <optional>
#include <string_view>
#include <dwarfs/fragment_category.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs {
namespace dwarfs::writer {
class category_resolver {
public:
@ -38,4 +38,4 @@ class category_resolver {
category_value(std::string_view name) const = 0;
};
} // namespace dwarfs
} // namespace dwarfs::writer

View File

@ -25,7 +25,7 @@
#include <unordered_map>
#include <vector>
#include <dwarfs/category_resolver.h>
#include <dwarfs/writer/category_resolver.h>
namespace dwarfs::writer {

View File

@ -21,11 +21,21 @@
#pragma once
#include <iosfwd>
#include <dwarfs/block_compressor.h>
#include <dwarfs/fragment_category.h>
#include <dwarfs/options.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs {
class logger;
class thread_pool;
namespace writer {
class writer_progress;
namespace internal {
class filesystem_writer_detail;
@ -34,8 +44,12 @@ class filesystem_writer_detail;
class filesystem_writer {
public:
explicit filesystem_writer(
std::unique_ptr<internal::filesystem_writer_detail> impl);
filesystem_writer(
std::ostream& os, logger& lgr, thread_pool& pool, writer_progress& prog,
block_compressor const& schema_bc, block_compressor const& metadata_bc,
block_compressor const& history_bc,
filesystem_writer_options const& options = filesystem_writer_options(),
std::istream* header = nullptr);
~filesystem_writer();
filesystem_writer(filesystem_writer&&);
@ -51,4 +65,6 @@ class filesystem_writer {
std::unique_ptr<internal::filesystem_writer_detail> impl_;
};
} // namespace writer
} // namespace dwarfs

View File

@ -29,7 +29,7 @@
#include <fmt/format.h>
namespace dwarfs {
namespace dwarfs::writer {
class fragment_category {
public:
@ -116,12 +116,14 @@ operator<<(std::ostream& os, fragment_category const& cat) {
return os;
}
} // namespace dwarfs
} // namespace dwarfs::writer
template <>
struct fmt::formatter<dwarfs::fragment_category> : formatter<std::string> {
struct fmt::formatter<dwarfs::writer::fragment_category>
: formatter<std::string> {
template <typename FormatContext>
auto format(dwarfs::fragment_category const& cat, FormatContext& ctx) const {
auto format(dwarfs::writer::fragment_category const& cat,
FormatContext& ctx) const {
if (cat) {
if (cat.has_subcategory()) {
return formatter<std::string>::format(
@ -139,8 +141,8 @@ struct fmt::formatter<dwarfs::fragment_category> : formatter<std::string> {
namespace std {
template <>
struct hash<dwarfs::fragment_category> {
std::size_t operator()(dwarfs::fragment_category const& k) const {
struct hash<dwarfs::writer::fragment_category> {
std::size_t operator()(dwarfs::writer::fragment_category const& k) const {
return k.hash();
}
};

View File

@ -28,9 +28,9 @@
#include <string>
#include <unordered_map>
#include <dwarfs/fragment_category.h>
#include <dwarfs/small_vector.h>
#include <dwarfs/types.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs::writer {

View File

@ -25,7 +25,7 @@
#include <string_view>
#include <vector>
namespace dwarfs::internal {
namespace dwarfs::writer::internal {
class block_data {
public:
@ -51,4 +51,4 @@ class block_data {
std::vector<uint8_t> vec_;
};
} // namespace dwarfs::internal
} // namespace dwarfs::writer::internal

View File

@ -25,8 +25,8 @@
#include <optional>
#include <vector>
#include <dwarfs/fragment_category.h>
#include <dwarfs/gen-cpp2/metadata_types.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs::writer::internal {

View File

@ -32,16 +32,21 @@
#include <dwarfs/block_compressor.h>
#include <dwarfs/compression_constraints.h>
#include <dwarfs/fragment_category.h>
#include <dwarfs/fstypes.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs {
namespace internal {
class block_data;
class fs_section;
}
namespace writer::internal {
class block_data;
class filesystem_writer_detail {
public:
virtual ~filesystem_writer_detail() = default;
@ -64,17 +69,14 @@ class filesystem_writer_detail {
virtual void
configure_rewrite(size_t filesystem_size, size_t block_count) = 0;
virtual void copy_header(std::span<uint8_t const> header) = 0;
virtual void write_block(fragment_category cat,
std::shared_ptr<dwarfs::internal::block_data>&& data,
physical_block_cb_type physical_block_cb,
std::optional<std::string> meta = std::nullopt) = 0;
virtual void
write_block(fragment_category cat, std::shared_ptr<block_data>&& data,
physical_block_cb_type physical_block_cb,
std::optional<std::string> meta = std::nullopt) = 0;
virtual void finish_category(fragment_category cat) = 0;
virtual void write_metadata_v2_schema(
std::shared_ptr<dwarfs::internal::block_data>&& data) = 0;
virtual void
write_metadata_v2(std::shared_ptr<dwarfs::internal::block_data>&& data) = 0;
virtual void
write_history(std::shared_ptr<dwarfs::internal::block_data>&& data) = 0;
virtual void write_metadata_v2_schema(std::shared_ptr<block_data>&& data) = 0;
virtual void write_metadata_v2(std::shared_ptr<block_data>&& data) = 0;
virtual void write_history(std::shared_ptr<block_data>&& data) = 0;
virtual void check_block_compression(
compression_type compression, std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat = std::nullopt) = 0;
@ -88,6 +90,6 @@ class filesystem_writer_detail {
virtual size_t size() const = 0;
};
} // namespace internal
} // namespace writer::internal
} // namespace dwarfs

View File

@ -24,7 +24,7 @@
#include <optional>
#include <span>
#include <dwarfs/fragment_category.h>
#include <dwarfs/writer/fragment_category.h>
#include <dwarfs/writer/internal/nilsimsa.h>
#include <dwarfs/writer/internal/similarity_ordering.h>

View File

@ -29,7 +29,7 @@
#include <utility>
#include <vector>
#include <dwarfs/fragment_category.h>
#include <dwarfs/writer/fragment_category.h>
#include <dwarfs/writer/internal/inode.h>

View File

@ -21,32 +21,28 @@
#pragma once
#include <iosfwd>
#include <dwarfs/filesystem_writer.h>
#include <dwarfs/options.h>
namespace dwarfs {
class block_compressor;
class logger;
class thread_pool;
// TODO: move to writer namespace
struct rewrite_options;
namespace reader {
class filesystem_v2;
} // namespace reader
namespace writer {
class writer_progress;
class category_resolver;
class filesystem_writer;
class filesystem_writer_factory {
public:
static filesystem_writer
create(std::ostream& os, logger& lgr, thread_pool& pool,
writer_progress& prog, block_compressor const& schema_bc,
block_compressor const& metadata_bc,
block_compressor const& history_bc,
filesystem_writer_options const& options = filesystem_writer_options(),
std::istream* header = nullptr);
};
void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
filesystem_writer& writer,
category_resolver const& cat_resolver,
rewrite_options const& opts);
} // namespace writer
} // namespace dwarfs

View File

@ -32,18 +32,13 @@ struct compression_constraints;
class logger;
namespace internal {
class block_data;
}
namespace writer {
class writer_progress;
namespace internal {
class block_data;
class block_manager;
class chunkable;
@ -61,7 +56,7 @@ class segmenter {
};
using block_ready_cb = std::function<void(
std::shared_ptr<dwarfs::internal::block_data>, size_t logical_block_num)>;
std::shared_ptr<internal::block_data>, size_t logical_block_num)>;
segmenter(logger& lgr, writer_progress& prog,
std::shared_ptr<internal::block_manager> blkmgr, config const& cfg,

View File

@ -1,45 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <dwarfs/filesystem_writer.h>
#include <dwarfs/internal/filesystem_writer_detail.h>
namespace dwarfs {
filesystem_writer::filesystem_writer(
std::unique_ptr<internal::filesystem_writer_detail> impl)
: impl_(std::move(impl)) {}
filesystem_writer::~filesystem_writer() = default;
filesystem_writer::filesystem_writer(filesystem_writer&&) = default;
filesystem_writer& filesystem_writer::operator=(filesystem_writer&&) = default;
void filesystem_writer::add_default_compressor(block_compressor bc) {
impl_->add_default_compressor(std::move(bc));
}
void filesystem_writer::add_category_compressor(
fragment_category::value_type cat, block_compressor bc) {
impl_->add_category_compressor(cat, std::move(bc));
}
} // namespace dwarfs

View File

@ -33,9 +33,7 @@
#include <fmt/format.h>
#include <dwarfs/block_compressor.h>
#include <dwarfs/category_resolver.h>
#include <dwarfs/error.h>
#include <dwarfs/filesystem_writer.h>
#include <dwarfs/fstypes.h>
#include <dwarfs/history.h>
#include <dwarfs/logger.h>
@ -46,14 +44,14 @@
#include <dwarfs/reader/filesystem_v2.h>
#include <dwarfs/util.h>
#include <dwarfs/internal/block_data.h>
#include <dwarfs/internal/filesystem_writer_detail.h>
#include <dwarfs/internal/fs_section.h>
#include <dwarfs/internal/worker_group.h>
#include <dwarfs/reader/internal/block_cache.h>
#include <dwarfs/reader/internal/filesystem_parser.h>
#include <dwarfs/reader/internal/inode_reader_v2.h>
#include <dwarfs/reader/internal/metadata_v2.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/filesystem_writer_detail.h>
namespace dwarfs::reader {
@ -288,9 +286,13 @@ class filesystem_ final : public filesystem_v2::impl {
std::vector<file_stat::gid_type> get_all_gids() const override {
return meta_.get_all_gids();
}
void
rewrite(filesystem_writer& fs_writer, category_resolver const& cat_resolver,
rewrite_options const& opts) const override;
std::shared_ptr<filesystem_parser> get_parser() const override {
return std::make_unique<filesystem_parser>(mm_, image_offset_);
}
std::optional<std::string>
get_block_category(size_t block_no) const override {
return meta_.get_block_category(block_no);
}
private:
filesystem_info const* get_info(fsinfo_options const& opts) const;
@ -505,169 +507,6 @@ filesystem_<LoggerPolicy>::filesystem_(
}
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::rewrite(filesystem_writer& fs_writer,
category_resolver const& cat_resolver,
rewrite_options const& opts) const {
filesystem_parser parser(mm_, image_offset_);
auto& writer = fs_writer.get_internal();
if (opts.recompress_block) {
size_t block_no{0};
parser.rewind();
while (auto s = parser.next_section()) {
if (s->type() == section_type::BLOCK) {
if (auto catstr = meta_.get_block_category(block_no)) {
if (auto cat = cat_resolver.category_value(catstr.value())) {
writer.check_block_compression(s->compression(), s->data(*mm_),
cat);
}
}
++block_no;
}
}
}
writer.configure_rewrite(mm_->size(), num_blocks());
if (header_) {
writer.copy_header(*header_);
}
size_t block_no{0};
auto log_rewrite =
[&](bool compressing, const auto& s,
std::optional<fragment_category::value_type> const& cat) {
auto prefix = compressing ? "recompressing" : "copying";
std::string catinfo;
std::string compinfo;
if (cat) {
catinfo = fmt::format(", {}", cat_resolver.category_name(*cat));
}
if (compressing) {
compinfo = fmt::format(
" using '{}'", writer.get_compressor(s->type(), cat).describe());
}
LOG_VERBOSE << prefix << " " << size_with_unit(s->length()) << " "
<< get_section_name(s->type()) << " ("
<< get_compression_name(s->compression()) << catinfo << ")"
<< compinfo;
};
auto log_recompress =
[&](const auto& s,
std::optional<fragment_category::value_type> const& cat =
std::nullopt) { log_rewrite(true, s, cat); };
auto copy_compressed =
[&](const auto& s,
std::optional<fragment_category::value_type> const& cat =
std::nullopt) {
log_rewrite(false, s, cat);
writer.write_compressed_section(*s, s->data(*mm_));
};
auto from_none_to_none =
[&](auto const& s,
std::optional<fragment_category::value_type> const& cat =
std::nullopt) {
if (s->compression() == compression_type::NONE) {
auto& bc = writer.get_compressor(s->type(), cat);
if (bc.type() == compression_type::NONE) {
return true;
}
}
return false;
};
parser.rewind();
while (auto s = parser.next_section()) {
switch (s->type()) {
case section_type::BLOCK: {
std::optional<fragment_category::value_type> cat;
bool recompress_block{true};
if (opts.recompress_block) {
auto catstr = meta_.get_block_category(block_no);
if (catstr) {
cat = cat_resolver.category_value(catstr.value());
if (!cat) {
LOG_ERROR << "unknown category '" << catstr.value()
<< "' for block " << block_no;
}
if (!opts.recompress_categories.empty()) {
bool is_in_set{opts.recompress_categories.count(catstr.value()) >
0};
recompress_block =
opts.recompress_categories_exclude ? !is_in_set : is_in_set;
}
}
}
if (recompress_block && from_none_to_none(s, cat)) {
recompress_block = false;
}
if (recompress_block) {
log_recompress(s, cat);
writer.write_section(section_type::BLOCK, s->compression(),
s->data(*mm_), cat);
} else {
copy_compressed(s, cat);
}
++block_no;
} break;
case section_type::METADATA_V2_SCHEMA:
case section_type::METADATA_V2:
if (opts.recompress_metadata && !from_none_to_none(s)) {
log_recompress(s);
writer.write_section(s->type(), s->compression(), s->data(*mm_));
} else {
copy_compressed(s);
}
break;
case section_type::HISTORY:
if (opts.enable_history) {
history hist{opts.history};
hist.parse(history_.serialize());
hist.append(opts.command_line_arguments);
LOG_VERBOSE << "updating " << get_section_name(s->type()) << " ("
<< get_compression_name(s->compression())
<< "), compressing using '"
<< writer.get_compressor(s->type()).describe() << "'";
writer.write_history(std::make_shared<block_data>(hist.serialize()));
} else {
LOG_VERBOSE << "removing " << get_section_name(s->type());
}
break;
case section_type::SECTION_INDEX:
// this will be automatically added by the filesystem_writer
break;
default:
// verbatim copy everything else
copy_compressed(s);
break;
}
}
writer.flush();
}
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::check(filesystem_check_level level,
size_t num_threads) const {

View File

@ -189,6 +189,13 @@ bool filesystem_parser::has_checksums() const { return version_ >= 2; }
bool filesystem_parser::has_index() const { return !index_.empty(); }
size_t filesystem_parser::filesystem_size() const { return mm_->size(); }
std::span<uint8_t const>
filesystem_parser::section_data(fs_section const& s) const {
return s.data(*mm_);
}
void filesystem_parser::find_index() {
uint64_t index_pos;

View File

@ -23,8 +23,8 @@
#include <folly/String.h>
#include <dwarfs/category_resolver.h>
#include <dwarfs/writer/category_parser.h>
#include <dwarfs/writer/category_resolver.h>
namespace dwarfs::writer {

View File

@ -40,13 +40,13 @@
#include <dwarfs/thread_pool.h>
#include <dwarfs/util.h>
#include <dwarfs/writer/compression_metadata_requirements.h>
#include <dwarfs/writer/filesystem_writer_factory.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/writer_progress.h>
#include <dwarfs/internal/block_data.h>
#include <dwarfs/internal/filesystem_writer_detail.h>
#include <dwarfs/internal/fs_section.h>
#include <dwarfs/internal/worker_group.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/filesystem_writer_detail.h>
#include <dwarfs/writer/internal/multi_queue_block_merger.h>
#include <dwarfs/writer/internal/progress.h>
@ -1102,16 +1102,30 @@ void filesystem_writer_<LoggerPolicy>::write_section_index() {
} // namespace internal
filesystem_writer filesystem_writer_factory::create(
std::ostream& os, logger& lgr, thread_pool& pool, writer_progress& prog,
block_compressor const& schema_bc, block_compressor const& metadata_bc,
block_compressor const& history_bc,
filesystem_writer_options const& options, std::istream* header) {
return filesystem_writer{
make_unique_logging_object<internal::filesystem_writer_detail,
internal::filesystem_writer_, logger_policies>(
filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
thread_pool& pool, writer_progress& prog,
block_compressor const& schema_bc,
block_compressor const& metadata_bc,
block_compressor const& history_bc,
filesystem_writer_options const& options,
std::istream* header)
: impl_{make_unique_logging_object<internal::filesystem_writer_detail,
internal::filesystem_writer_,
logger_policies>(
lgr, os, pool.get_worker_group(), prog.get_internal(), schema_bc,
metadata_bc, history_bc, options, header)};
metadata_bc, history_bc, options, header)} {}
filesystem_writer::~filesystem_writer() = default;
filesystem_writer::filesystem_writer(filesystem_writer&&) = default;
filesystem_writer& filesystem_writer::operator=(filesystem_writer&&) = default;
void filesystem_writer::add_default_compressor(block_compressor bc) {
impl_->add_default_compressor(std::move(bc));
}
void filesystem_writer::add_category_compressor(
fragment_category::value_type cat, block_compressor bc) {
impl_->add_category_compressor(cat, std::move(bc));
}
} // namespace dwarfs::writer

View File

@ -21,12 +21,12 @@
#include <folly/hash/Hash.h>
#include <dwarfs/fragment_category.h>
#include <dwarfs/writer/fragment_category.h>
namespace dwarfs {
namespace dwarfs::writer {
size_t fragment_category::hash() const {
return folly::hash::hash_combine(value_, subcategory_);
}
} // namespace dwarfs
} // namespace dwarfs::writer

View File

@ -0,0 +1,202 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <dwarfs/history.h>
#include <dwarfs/logger.h>
#include <dwarfs/reader/filesystem_v2.h>
#include <dwarfs/writer/category_resolver.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/reader/internal/filesystem_parser.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/filesystem_writer_detail.h>
namespace dwarfs::writer {
void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
filesystem_writer& fs_writer,
category_resolver const& cat_resolver,
rewrite_options const& opts) {
LOG_PROXY(debug_logger_policy, lgr);
auto parser = fs.get_parser();
auto& writer = fs_writer.get_internal();
if (opts.recompress_block) {
size_t block_no{0};
parser->rewind();
while (auto s = parser->next_section()) {
if (s->type() == section_type::BLOCK) {
if (auto catstr = fs.get_block_category(block_no)) {
if (auto cat = cat_resolver.category_value(catstr.value())) {
writer.check_block_compression(s->compression(),
parser->section_data(*s), cat);
}
}
++block_no;
}
}
}
writer.configure_rewrite(parser->filesystem_size(), fs.num_blocks());
if (auto header = parser->header()) {
writer.copy_header(*header);
}
size_t block_no{0};
auto log_rewrite =
[&](bool compressing, const auto& s,
std::optional<fragment_category::value_type> const& cat) {
auto prefix = compressing ? "recompressing" : "copying";
std::string catinfo;
std::string compinfo;
if (cat) {
catinfo = fmt::format(", {}", cat_resolver.category_name(*cat));
}
if (compressing) {
compinfo = fmt::format(
" using '{}'", writer.get_compressor(s->type(), cat).describe());
}
LOG_VERBOSE << prefix << " " << size_with_unit(s->length()) << " "
<< get_section_name(s->type()) << " ("
<< get_compression_name(s->compression()) << catinfo << ")"
<< compinfo;
};
auto log_recompress =
[&](const auto& s,
std::optional<fragment_category::value_type> const& cat =
std::nullopt) { log_rewrite(true, s, cat); };
auto copy_compressed =
[&](const auto& s,
std::optional<fragment_category::value_type> const& cat =
std::nullopt) {
log_rewrite(false, s, cat);
writer.write_compressed_section(*s, parser->section_data(*s));
};
auto from_none_to_none =
[&](auto const& s,
std::optional<fragment_category::value_type> const& cat =
std::nullopt) {
if (s->compression() == compression_type::NONE) {
auto& bc = writer.get_compressor(s->type(), cat);
if (bc.type() == compression_type::NONE) {
return true;
}
}
return false;
};
parser->rewind();
while (auto s = parser->next_section()) {
switch (s->type()) {
case section_type::BLOCK: {
std::optional<fragment_category::value_type> cat;
bool recompress_block{true};
if (opts.recompress_block) {
auto catstr = fs.get_block_category(block_no);
if (catstr) {
cat = cat_resolver.category_value(catstr.value());
if (!cat) {
LOG_ERROR << "unknown category '" << catstr.value()
<< "' for block " << block_no;
}
if (!opts.recompress_categories.empty()) {
bool is_in_set{opts.recompress_categories.count(catstr.value()) >
0};
recompress_block =
opts.recompress_categories_exclude ? !is_in_set : is_in_set;
}
}
}
if (recompress_block && from_none_to_none(s, cat)) {
recompress_block = false;
}
if (recompress_block) {
log_recompress(s, cat);
writer.write_section(section_type::BLOCK, s->compression(),
parser->section_data(*s), cat);
} else {
copy_compressed(s, cat);
}
++block_no;
} break;
case section_type::METADATA_V2_SCHEMA:
case section_type::METADATA_V2:
if (opts.recompress_metadata && !from_none_to_none(s)) {
log_recompress(s);
writer.write_section(s->type(), s->compression(),
parser->section_data(*s));
} else {
copy_compressed(s);
}
break;
case section_type::HISTORY:
if (opts.enable_history) {
history hist{opts.history};
hist.parse(fs.get_history().serialize());
hist.append(opts.command_line_arguments);
LOG_VERBOSE << "updating " << get_section_name(s->type()) << " ("
<< get_compression_name(s->compression())
<< "), compressing using '"
<< writer.get_compressor(s->type()).describe() << "'";
writer.write_history(
std::make_shared<internal::block_data>(hist.serialize()));
} else {
LOG_VERBOSE << "removing " << get_section_name(s->type());
}
break;
case section_type::SECTION_INDEX:
// this will be automatically added by the filesystem_writer
break;
default:
// verbatim copy everything else
copy_compressed(s);
break;
}
}
writer.flush();
}
} // namespace dwarfs::writer

View File

@ -40,7 +40,6 @@
#include <dwarfs/error.h>
#include <dwarfs/file_access.h>
#include <dwarfs/filesystem_writer.h>
#include <dwarfs/history.h>
#include <dwarfs/logger.h>
#include <dwarfs/mmif.h>
@ -53,18 +52,19 @@
#include <dwarfs/writer/entry_factory.h>
#include <dwarfs/writer/entry_filter.h>
#include <dwarfs/writer/entry_transformer.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/scanner.h>
#include <dwarfs/writer/segmenter_factory.h>
#include <dwarfs/writer/writer_progress.h>
#include <dwarfs/internal/block_data.h>
#include <dwarfs/internal/features.h>
#include <dwarfs/internal/filesystem_writer_detail.h>
#include <dwarfs/internal/string_table.h>
#include <dwarfs/internal/worker_group.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/block_manager.h>
#include <dwarfs/writer/internal/entry.h>
#include <dwarfs/writer/internal/file_scanner.h>
#include <dwarfs/writer/internal/filesystem_writer_detail.h>
#include <dwarfs/writer/internal/fragment_chunkable.h>
#include <dwarfs/writer/internal/global_entry_data.h>
#include <dwarfs/writer/internal/inode.h>

View File

@ -49,7 +49,7 @@
#include <dwarfs/writer/segmenter.h>
#include <dwarfs/writer/writer_progress.h>
#include <dwarfs/internal/block_data.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/block_manager.h>
#include <dwarfs/writer/internal/chunkable.h>
#include <dwarfs/writer/internal/cyclic_hash.h>
@ -60,8 +60,6 @@ namespace dwarfs::writer {
namespace internal {
using namespace dwarfs::internal;
namespace {
/**

View File

@ -80,9 +80,10 @@
#include <dwarfs/writer/console_writer.h>
#include <dwarfs/writer/entry_factory.h>
#include <dwarfs/writer/filesystem_block_category_resolver.h>
#include <dwarfs/writer/filesystem_writer_factory.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/filter_debug.h>
#include <dwarfs/writer/fragment_order_parser.h>
#include <dwarfs/writer/rewrite_filesystem.h>
#include <dwarfs/writer/rule_based_entry_filter.h>
#include <dwarfs/writer/scanner.h>
#include <dwarfs/writer/segmenter_factory.h>
@ -1168,7 +1169,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
}
std::optional<reader::filesystem_v2> input_filesystem;
std::shared_ptr<category_resolver> cat_resolver;
std::shared_ptr<writer::category_resolver> cat_resolver;
if (recompress) {
input_filesystem.emplace(
@ -1269,7 +1270,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
std::numeric_limits<size_t>::max(),
compress_niceness);
std::optional<filesystem_writer> fsw;
std::optional<writer::filesystem_writer> fsw;
try {
std::ostream& fsw_os =
@ -1280,9 +1281,8 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
},
[&](std::ostringstream& oss) -> std::ostream& { return oss; }};
fsw = writer::filesystem_writer_factory::create(
fsw_os, lgr, compress_pool, prog, schema_bc, metadata_bc, history_bc,
fswopts, header_ifs ? &header_ifs->is() : nullptr);
fsw.emplace(fsw_os, lgr, compress_pool, prog, schema_bc, metadata_bc,
history_bc, fswopts, header_ifs ? &header_ifs->is() : nullptr);
writer::categorized_option<block_compressor> compression_opt;
writer::contextual_option_parser cop("--compression", compression_opt, cp,
@ -1308,7 +1308,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
if (recompress) {
compression_opt.visit_contextual(
[catres = cat_resolver, &fsw](auto cat, block_compressor const& bc) {
[&fsw](auto cat, block_compressor const& bc) {
fsw->add_category_compressor(cat, bc);
});
} else {
@ -1335,8 +1335,8 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
try {
if (recompress) {
input_filesystem->rewrite(*fsw, *cat_resolver, rw_opts);
compress_pool.wait();
writer::rewrite_filesystem(lgr, *input_filesystem, *fsw, *cat_resolver,
rw_opts);
} else {
writer::segmenter_factory sf(lgr, prog, options.inode.categorizer_mgr,
sf_config);

View File

@ -46,7 +46,8 @@
#include <dwarfs/thread_pool.h>
#include <dwarfs/vfs_stat.h>
#include <dwarfs/writer/filesystem_block_category_resolver.h>
#include <dwarfs/writer/filesystem_writer_factory.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/rewrite_filesystem.h>
#include <dwarfs/writer/writer_progress.h>
#include "mmap_mock.h"
@ -1126,12 +1127,11 @@ TEST_P(rewrite, filesystem_rewrite) {
reader::filesystem_v2 fs(lgr, os, mm, fsopts);
writer::filesystem_block_category_resolver resolver(
fs.get_all_block_categories());
fs.rewrite(fsw, resolver, opts);
writer::rewrite_filesystem(lgr, fs, fsw, resolver, opts);
};
{
auto fsw = writer::filesystem_writer_factory::create(rewritten, lgr, pool,
prog, bc, bc, bc);
writer::filesystem_writer fsw(rewritten, lgr, pool, prog, bc, bc, bc);
fsw.add_default_compressor(bc);
auto mm = std::make_shared<mmap>(filename);
EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, mm, idss));
@ -1153,8 +1153,8 @@ TEST_P(rewrite, filesystem_rewrite) {
{
std::istringstream hdr_iss(format_sh);
filesystem_writer_options fsw_opts;
auto fsw = writer::filesystem_writer_factory::create(
rewritten, lgr, pool, prog, bc, bc, bc, fsw_opts, &hdr_iss);
writer::filesystem_writer fsw(rewritten, lgr, pool, prog, bc, bc, bc,
fsw_opts, &hdr_iss);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<mmap>(filename));
}
@ -1179,8 +1179,8 @@ TEST_P(rewrite, filesystem_rewrite) {
{
std::istringstream hdr_iss("D");
filesystem_writer_options fsw_opts;
auto fsw = writer::filesystem_writer_factory::create(
rewritten2, lgr, pool, prog, bc, bc, bc, fsw_opts, &hdr_iss);
writer::filesystem_writer fsw(rewritten2, lgr, pool, prog, bc, bc, bc,
fsw_opts, &hdr_iss);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten.str()));
}
@ -1197,8 +1197,7 @@ TEST_P(rewrite, filesystem_rewrite) {
std::ostringstream rewritten3;
{
auto fsw = writer::filesystem_writer_factory::create(rewritten3, lgr, pool,
prog, bc, bc, bc);
writer::filesystem_writer fsw(rewritten3, lgr, pool, prog, bc, bc, bc);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten2.str()));
}
@ -1217,8 +1216,8 @@ TEST_P(rewrite, filesystem_rewrite) {
{
filesystem_writer_options fsw_opts;
fsw_opts.remove_header = true;
auto fsw = writer::filesystem_writer_factory::create(
rewritten4, lgr, pool, prog, bc, bc, bc, fsw_opts);
writer::filesystem_writer fsw(rewritten4, lgr, pool, prog, bc, bc, bc,
fsw_opts);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten3.str()));
}
@ -1237,8 +1236,8 @@ TEST_P(rewrite, filesystem_rewrite) {
{
filesystem_writer_options fsw_opts;
fsw_opts.no_section_index = true;
auto fsw = writer::filesystem_writer_factory::create(
rewritten5, lgr, pool, prog, bc, bc, bc, fsw_opts);
writer::filesystem_writer fsw(rewritten5, lgr, pool, prog, bc, bc, bc,
fsw_opts);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten4.str()));
}

View File

@ -34,7 +34,7 @@
#include <dwarfs/thread_pool.h>
#include <dwarfs/vfs_stat.h>
#include <dwarfs/writer/entry_factory.h>
#include <dwarfs/writer/filesystem_writer_factory.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/scanner.h>
#include <dwarfs/writer/segmenter_factory.h>
#include <dwarfs/writer/writer_progress.h>
@ -131,8 +131,7 @@ std::string make_filesystem(::benchmark::State const& state) {
std::ostringstream oss;
block_compressor bc("null");
auto fsw = writer::filesystem_writer_factory::create(oss, lgr, pool, prog, bc,
bc, bc);
writer::filesystem_writer fsw(oss, lgr, pool, prog, bc, bc, bc);
fsw.add_default_compressor(bc);
s.scan(fsw, "", prog);

View File

@ -46,7 +46,7 @@
#include <dwarfs/thread_pool.h>
#include <dwarfs/vfs_stat.h>
#include <dwarfs/writer/entry_factory.h>
#include <dwarfs/writer/filesystem_writer_factory.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/filter_debug.h>
#include <dwarfs/writer/rule_based_entry_filter.h>
#include <dwarfs/writer/scanner.h>
@ -115,8 +115,7 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
std::ostringstream oss;
block_compressor bc(compression);
auto fsw = writer::filesystem_writer_factory::create(oss, lgr, pool, *prog,
bc, bc, bc);
writer::filesystem_writer fsw(oss, lgr, pool, *prog, bc, bc, bc);
fsw.add_default_compressor(bc);
s.scan(fsw, std::filesystem::path("/"), *prog, input_list);
@ -981,8 +980,7 @@ class filter_test
block_compressor bc("null");
std::ostringstream null;
auto fsw = writer::filesystem_writer_factory::create(null, lgr, pool, prog,
bc, bc, bc);
writer::filesystem_writer fsw(null, lgr, pool, prog, bc, bc, bc);
s.scan(fsw, std::filesystem::path("/"), prog);
return oss.str();

View File

@ -26,7 +26,7 @@
#include <dwarfs/block_compressor.h>
#include <dwarfs/thread_pool.h>
#include <dwarfs/writer/filesystem_writer_factory.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/writer_progress.h>
#include "test_helpers.h"
@ -37,7 +37,7 @@ using namespace dwarfs;
namespace fs = std::filesystem;
TEST(filesystem_writer, compression_metadata_requirements) {
using writer::filesystem_writer_factory;
using writer::filesystem_writer;
test::test_logger lgr;
auto os = test::os_access_mock::create_test_instance();
@ -47,16 +47,15 @@ TEST(filesystem_writer, compression_metadata_requirements) {
block_compressor bcnull("null");
EXPECT_NO_THROW(filesystem_writer_factory::create(devnull, lgr, pool, prog,
bcnull, bcnull, bcnull));
EXPECT_NO_THROW(
filesystem_writer(devnull, lgr, pool, prog, bcnull, bcnull, bcnull));
#ifdef DWARFS_HAVE_FLAC
block_compressor bcflac("flac:level=1");
EXPECT_THAT(
[&] {
filesystem_writer_factory::create(devnull, lgr, pool, prog, bcflac,
bcnull, bcnull);
filesystem_writer(devnull, lgr, pool, prog, bcflac, bcnull, bcnull);
},
testing::ThrowsMessage<dwarfs::runtime_error>(testing::HasSubstr(
"cannot use 'flac [level=1]' for schema compression because "
@ -65,8 +64,7 @@ TEST(filesystem_writer, compression_metadata_requirements) {
EXPECT_THAT(
[&] {
filesystem_writer_factory::create(devnull, lgr, pool, prog, bcnull,
bcflac, bcnull);
filesystem_writer(devnull, lgr, pool, prog, bcnull, bcflac, bcnull);
},
testing::ThrowsMessage<dwarfs::runtime_error>(testing::HasSubstr(
"cannot use 'flac [level=1]' for metadata compression because "
@ -75,8 +73,7 @@ TEST(filesystem_writer, compression_metadata_requirements) {
EXPECT_THAT(
[&] {
filesystem_writer_factory::create(devnull, lgr, pool, prog, bcnull,
bcnull, bcflac);
filesystem_writer(devnull, lgr, pool, prog, bcnull, bcnull, bcflac);
},
testing::ThrowsMessage<dwarfs::runtime_error>(testing::HasSubstr(
"cannot use 'flac [level=1]' for history compression because "
@ -89,8 +86,7 @@ TEST(filesystem_writer, compression_metadata_requirements) {
EXPECT_THAT(
[&] {
filesystem_writer_factory::create(devnull, lgr, pool, prog, bcrice,
bcnull, bcnull);
filesystem_writer(devnull, lgr, pool, prog, bcrice, bcnull, bcnull);
},
testing::ThrowsMessage<dwarfs::runtime_error>(testing::HasSubstr(
"cannot use 'ricepp [block_size=128]' for schema compression because "
@ -99,8 +95,7 @@ TEST(filesystem_writer, compression_metadata_requirements) {
EXPECT_THAT(
[&] {
filesystem_writer_factory::create(devnull, lgr, pool, prog, bcnull,
bcrice, bcnull);
filesystem_writer(devnull, lgr, pool, prog, bcnull, bcrice, bcnull);
},
testing::ThrowsMessage<dwarfs::runtime_error>(testing::HasSubstr(
"cannot use 'ricepp [block_size=128]' for metadata compression "
@ -110,8 +105,7 @@ TEST(filesystem_writer, compression_metadata_requirements) {
EXPECT_THAT(
[&] {
filesystem_writer_factory::create(devnull, lgr, pool, prog, bcnull,
bcnull, bcrice);
filesystem_writer(devnull, lgr, pool, prog, bcnull, bcnull, bcrice);
},
testing::ThrowsMessage<dwarfs::runtime_error>(testing::HasSubstr(
"cannot use 'ricepp [block_size=128]' for history compression "

View File

@ -133,7 +133,7 @@ TEST_F(fits_categorizer, unused_lsb_count_test) {
auto metadata_category = catmgr->category_value("fits/metadata").value();
auto image_category = catmgr->category_value("fits/image").value();
std::map<fragment_category, std::set<unsigned>> categories;
std::map<writer::fragment_category, std::set<unsigned>> categories;
for (size_t offset = 0; offset < 64; offset += 2) {
std::span<uint8_t> fits{data.data() + offset, 2 * 2880};

View File

@ -29,9 +29,9 @@
#include <fmt/format.h>
#include <dwarfs/fragment_category.h>
#include <dwarfs/writer/fragment_category.h>
using namespace dwarfs;
using namespace dwarfs::writer;
TEST(fragment_category_test, basic) {
fragment_category c;

View File

@ -28,7 +28,7 @@
#include <dwarfs/writer/segmenter.h>
#include <dwarfs/writer/writer_progress.h>
#include <dwarfs/internal/block_data.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/block_manager.h>
#include <dwarfs/writer/internal/chunkable.h>
@ -145,12 +145,13 @@ void run_segmenter_test(unsigned iters, unsigned granularity,
dwarfs::writer::writer_progress prog;
auto blkmgr = std::make_shared<dwarfs::writer::internal::block_manager>();
std::vector<std::shared_ptr<dwarfs::internal::block_data>> written;
std::vector<std::shared_ptr<dwarfs::writer::internal::block_data>> written;
dwarfs::writer::segmenter seg(
lgr, prog, blkmgr, cfg, cc, total_size,
[&written, blkmgr](std::shared_ptr<dwarfs::internal::block_data> blk,
auto logical_block_num) {
[&written,
blkmgr](std::shared_ptr<dwarfs::writer::internal::block_data> blk,
auto logical_block_num) {
auto physical_block_num = written.size();
written.push_back(blk);
blkmgr->set_written_block(logical_block_num, physical_block_num, 0);