mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-11 13:30:47 -04:00
refactor: factor out metadata_builder
from scanner
This commit is contained in:
parent
8455dc0229
commit
dad4ea9fb7
@ -140,6 +140,7 @@ add_library(
|
|||||||
src/writer/internal/inode_element_view.cpp
|
src/writer/internal/inode_element_view.cpp
|
||||||
src/writer/internal/inode_manager.cpp
|
src/writer/internal/inode_manager.cpp
|
||||||
src/writer/internal/inode_ordering.cpp
|
src/writer/internal/inode_ordering.cpp
|
||||||
|
src/writer/internal/metadata_builder.cpp
|
||||||
src/writer/internal/metadata_freezer.cpp
|
src/writer/internal/metadata_freezer.cpp
|
||||||
src/writer/internal/nilsimsa.cpp
|
src/writer/internal/nilsimsa.cpp
|
||||||
src/writer/internal/progress.cpp
|
src/writer/internal/progress.cpp
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
|
|
||||||
namespace dwarfs::writer {
|
namespace dwarfs::writer {
|
||||||
|
|
||||||
struct scanner_options;
|
struct metadata_options;
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
@ -46,8 +46,8 @@ class global_entry_data {
|
|||||||
|
|
||||||
enum class timestamp_type { ATIME, MTIME, CTIME };
|
enum class timestamp_type { ATIME, MTIME, CTIME };
|
||||||
|
|
||||||
global_entry_data(scanner_options const& options)
|
global_entry_data(metadata_options const& options)
|
||||||
: options_(options) {}
|
: options_{options} {}
|
||||||
|
|
||||||
void add_uid(uid_type uid);
|
void add_uid(uid_type uid);
|
||||||
void add_gid(gid_type gid);
|
void add_gid(gid_type gid);
|
||||||
@ -108,7 +108,7 @@ class global_entry_data {
|
|||||||
gid_type next_gid_index_{0};
|
gid_type next_gid_index_{0};
|
||||||
mode_type next_mode_index_{0};
|
mode_type next_mode_index_{0};
|
||||||
uint64_t timestamp_base_{std::numeric_limits<uint64_t>::max()};
|
uint64_t timestamp_base_{std::numeric_limits<uint64_t>::max()};
|
||||||
scanner_options const& options_;
|
metadata_options const& options_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
141
include/dwarfs/writer/internal/metadata_builder.h
Normal file
141
include/dwarfs/writer/internal/metadata_builder.h
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
#include <span>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace dwarfs {
|
||||||
|
|
||||||
|
class logger;
|
||||||
|
|
||||||
|
namespace writer {
|
||||||
|
struct metadata_options;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace thrift::metadata {
|
||||||
|
class metadata;
|
||||||
|
} // namespace thrift::metadata
|
||||||
|
|
||||||
|
namespace writer::internal {
|
||||||
|
|
||||||
|
class global_entry_data;
|
||||||
|
class inode_manager;
|
||||||
|
class block_manager;
|
||||||
|
class dir;
|
||||||
|
|
||||||
|
class metadata_builder {
|
||||||
|
public:
|
||||||
|
metadata_builder(logger& lgr, metadata_options const& options);
|
||||||
|
metadata_builder(logger& lgr, thrift::metadata::metadata const& md,
|
||||||
|
metadata_options const& options);
|
||||||
|
metadata_builder(logger& lgr, thrift::metadata::metadata&& md,
|
||||||
|
metadata_options const& options);
|
||||||
|
~metadata_builder();
|
||||||
|
|
||||||
|
void set_devices(std::vector<uint64_t> devices) {
|
||||||
|
impl_->set_devices(std::move(devices));
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_symlink_table_size(size_t size) {
|
||||||
|
impl_->set_symlink_table_size(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_block_size(uint32_t block_size) {
|
||||||
|
impl_->set_block_size(block_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_total_fs_size(uint64_t total_fs_size) {
|
||||||
|
impl_->set_total_fs_size(total_fs_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_total_hardlink_size(uint64_t total_hardlink_size) {
|
||||||
|
impl_->set_total_hardlink_size(total_hardlink_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_shared_files_table(std::vector<uint32_t> shared_files) {
|
||||||
|
impl_->set_shared_files_table(std::move(shared_files));
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_category_names(std::vector<std::string> category_names) {
|
||||||
|
impl_->set_category_names(std::move(category_names));
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_block_categories(std::vector<uint32_t> block_categories) {
|
||||||
|
impl_->set_block_categories(std::move(block_categories));
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_symlink_table_entry(size_t index, uint32_t entry) {
|
||||||
|
impl_->add_symlink_table_entry(index, entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gather_chunks(inode_manager const& im, block_manager const& bm,
|
||||||
|
size_t chunk_count) {
|
||||||
|
impl_->gather_chunks(im, bm, chunk_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gather_entries(std::span<dir*> dirs, global_entry_data const& ge_data,
|
||||||
|
uint32_t num_inodes) {
|
||||||
|
impl_->gather_entries(dirs, ge_data, num_inodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gather_global_entry_data(global_entry_data const& ge_data) {
|
||||||
|
impl_->gather_global_entry_data(ge_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
thrift::metadata::metadata const& build() { return impl_->build(); }
|
||||||
|
|
||||||
|
class impl {
|
||||||
|
public:
|
||||||
|
virtual ~impl() = default;
|
||||||
|
|
||||||
|
virtual void set_devices(std::vector<uint64_t> devices) = 0;
|
||||||
|
virtual void set_symlink_table_size(size_t size) = 0;
|
||||||
|
virtual void set_block_size(uint32_t block_size) = 0;
|
||||||
|
virtual void set_total_fs_size(uint64_t total_fs_size) = 0;
|
||||||
|
virtual void set_total_hardlink_size(uint64_t total_hardlink_size) = 0;
|
||||||
|
virtual void set_shared_files_table(std::vector<uint32_t> shared_files) = 0;
|
||||||
|
virtual void
|
||||||
|
set_category_names(std::vector<std::string> category_names) = 0;
|
||||||
|
virtual void
|
||||||
|
set_block_categories(std::vector<uint32_t> block_categories) = 0;
|
||||||
|
virtual void add_symlink_table_entry(size_t index, uint32_t entry) = 0;
|
||||||
|
virtual void gather_chunks(inode_manager const& im, block_manager const& bm,
|
||||||
|
size_t chunk_count) = 0;
|
||||||
|
virtual void
|
||||||
|
gather_entries(std::span<dir*> dirs, global_entry_data const& ge_data,
|
||||||
|
uint32_t num_inodes) = 0;
|
||||||
|
virtual void gather_global_entry_data(global_entry_data const& ge_data) = 0;
|
||||||
|
|
||||||
|
virtual thrift::metadata::metadata const& build() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<impl> impl_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace writer::internal
|
||||||
|
|
||||||
|
} // namespace dwarfs
|
59
include/dwarfs/writer/metadata_options.h
Normal file
59
include/dwarfs/writer/metadata_options.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <dwarfs/file_stat.h>
|
||||||
|
#include <dwarfs/history_config.h>
|
||||||
|
#include <dwarfs/writer/inode_options.h>
|
||||||
|
|
||||||
|
namespace dwarfs::writer {
|
||||||
|
|
||||||
|
class entry_interface;
|
||||||
|
|
||||||
|
struct metadata_options {
|
||||||
|
std::optional<file_stat::uid_type> uid{};
|
||||||
|
std::optional<file_stat::gid_type> gid{};
|
||||||
|
std::optional<uint64_t> timestamp{};
|
||||||
|
bool keep_all_times{false};
|
||||||
|
uint32_t time_resolution_sec{1};
|
||||||
|
bool pack_chunk_table{false};
|
||||||
|
bool pack_directories{false};
|
||||||
|
bool pack_shared_files_table{false};
|
||||||
|
bool plain_names_table{false};
|
||||||
|
bool pack_names{false};
|
||||||
|
bool pack_names_index{false};
|
||||||
|
bool plain_symlinks_table{false};
|
||||||
|
bool pack_symlinks{false};
|
||||||
|
bool pack_symlinks_index{false};
|
||||||
|
bool force_pack_string_tables{false};
|
||||||
|
bool no_create_timestamp{false};
|
||||||
|
size_t inode_size_cache_min_chunk_count{128};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace dwarfs::writer
|
@ -33,6 +33,7 @@
|
|||||||
#include <dwarfs/file_stat.h>
|
#include <dwarfs/file_stat.h>
|
||||||
#include <dwarfs/history_config.h>
|
#include <dwarfs/history_config.h>
|
||||||
#include <dwarfs/writer/inode_options.h>
|
#include <dwarfs/writer/inode_options.h>
|
||||||
|
#include <dwarfs/writer/metadata_options.h>
|
||||||
|
|
||||||
namespace dwarfs::writer {
|
namespace dwarfs::writer {
|
||||||
|
|
||||||
@ -40,33 +41,17 @@ class entry_interface;
|
|||||||
|
|
||||||
struct scanner_options {
|
struct scanner_options {
|
||||||
std::optional<std::string> file_hash_algorithm{"xxh3-128"};
|
std::optional<std::string> file_hash_algorithm{"xxh3-128"};
|
||||||
std::optional<file_stat::uid_type> uid;
|
|
||||||
std::optional<file_stat::gid_type> gid;
|
|
||||||
std::optional<uint64_t> timestamp;
|
|
||||||
bool keep_all_times{false};
|
|
||||||
bool remove_empty_dirs{false};
|
bool remove_empty_dirs{false};
|
||||||
bool with_devices{false};
|
bool with_devices{false};
|
||||||
bool with_specials{false};
|
bool with_specials{false};
|
||||||
uint32_t time_resolution_sec{1};
|
|
||||||
inode_options inode;
|
inode_options inode;
|
||||||
bool pack_chunk_table{false};
|
|
||||||
bool pack_directories{false};
|
|
||||||
bool pack_shared_files_table{false};
|
|
||||||
bool plain_names_table{false};
|
|
||||||
bool pack_names{false};
|
|
||||||
bool pack_names_index{false};
|
|
||||||
bool plain_symlinks_table{false};
|
|
||||||
bool pack_symlinks{false};
|
|
||||||
bool pack_symlinks_index{false};
|
|
||||||
bool force_pack_string_tables{false};
|
|
||||||
bool no_create_timestamp{false};
|
|
||||||
std::optional<std::function<void(bool, writer::entry_interface const&)>>
|
std::optional<std::function<void(bool, writer::entry_interface const&)>>
|
||||||
debug_filter_function;
|
debug_filter_function;
|
||||||
size_t num_segmenter_workers{1};
|
size_t num_segmenter_workers{1};
|
||||||
bool enable_history{true};
|
bool enable_history{true};
|
||||||
std::optional<std::vector<std::string>> command_line_arguments;
|
std::optional<std::vector<std::string>> command_line_arguments;
|
||||||
history_config history;
|
history_config history;
|
||||||
size_t inode_size_cache_min_chunk_count{128};
|
metadata_options metadata;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace dwarfs::writer
|
} // namespace dwarfs::writer
|
||||||
|
@ -57,13 +57,11 @@ std::vector<T> global_entry_data::get_vector(map_type<T, U> const& map) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto global_entry_data::get_uids() const -> std::vector<uid_type> {
|
auto global_entry_data::get_uids() const -> std::vector<uid_type> {
|
||||||
return options_.uid ? std::vector<uid_type>{*options_.uid}
|
return get_vector(uids_);
|
||||||
: get_vector(uids_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto global_entry_data::get_gids() const -> std::vector<gid_type> {
|
auto global_entry_data::get_gids() const -> std::vector<gid_type> {
|
||||||
return options_.gid ? std::vector<gid_type>{*options_.gid}
|
return get_vector(gids_);
|
||||||
: get_vector(gids_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto global_entry_data::get_modes() const -> std::vector<mode_type> {
|
auto global_entry_data::get_modes() const -> std::vector<mode_type> {
|
||||||
|
351
src/writer/internal/metadata_builder.cpp
Normal file
351
src/writer/internal/metadata_builder.cpp
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <ctime>
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
#include <dwarfs/logger.h>
|
||||||
|
#include <dwarfs/version.h>
|
||||||
|
|
||||||
|
#include <dwarfs/writer/metadata_options.h>
|
||||||
|
|
||||||
|
#include <dwarfs/internal/features.h>
|
||||||
|
#include <dwarfs/internal/string_table.h>
|
||||||
|
#include <dwarfs/writer/internal/block_manager.h>
|
||||||
|
#include <dwarfs/writer/internal/entry.h>
|
||||||
|
#include <dwarfs/writer/internal/global_entry_data.h>
|
||||||
|
#include <dwarfs/writer/internal/inode_manager.h>
|
||||||
|
#include <dwarfs/writer/internal/metadata_builder.h>
|
||||||
|
|
||||||
|
#include <dwarfs/gen-cpp2/metadata_types.h>
|
||||||
|
|
||||||
|
namespace dwarfs::writer::internal {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using namespace dwarfs::internal;
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
class metadata_builder_ final : public metadata_builder::impl {
|
||||||
|
public:
|
||||||
|
metadata_builder_(logger& lgr, metadata_options const& options)
|
||||||
|
: LOG_PROXY_INIT(lgr)
|
||||||
|
, options_{options} {}
|
||||||
|
|
||||||
|
metadata_builder_(logger& lgr, thrift::metadata::metadata const& md,
|
||||||
|
metadata_options const& options)
|
||||||
|
: LOG_PROXY_INIT(lgr)
|
||||||
|
, md_{md}
|
||||||
|
, options_{options} {}
|
||||||
|
|
||||||
|
metadata_builder_(logger& lgr, thrift::metadata::metadata&& md,
|
||||||
|
metadata_options const& options)
|
||||||
|
: LOG_PROXY_INIT(lgr)
|
||||||
|
, md_{std::move(md)}
|
||||||
|
, options_{options} {}
|
||||||
|
|
||||||
|
void set_devices(std::vector<uint64_t> devices) override {
|
||||||
|
md_.devices() = std::move(devices);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_symlink_table_size(size_t size) override {
|
||||||
|
md_.symlink_table()->resize(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_block_size(uint32_t block_size) override {
|
||||||
|
md_.block_size() = block_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_total_fs_size(uint64_t total_fs_size) override {
|
||||||
|
md_.total_fs_size() = total_fs_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_total_hardlink_size(uint64_t total_hardlink_size) override {
|
||||||
|
md_.total_hardlink_size() = total_hardlink_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_shared_files_table(std::vector<uint32_t> shared_files) override {
|
||||||
|
md_.shared_files_table() = std::move(shared_files);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_category_names(std::vector<std::string> category_names) override {
|
||||||
|
md_.category_names() = std::move(category_names);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_block_categories(std::vector<uint32_t> block_categories) override {
|
||||||
|
md_.block_categories() = std::move(block_categories);
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_symlink_table_entry(size_t index, uint32_t entry) override {
|
||||||
|
DWARFS_NOTHROW(md_.symlink_table()->at(index)) = entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gather_chunks(inode_manager const& im, block_manager const& bm,
|
||||||
|
size_t chunk_count) override;
|
||||||
|
|
||||||
|
void gather_entries(std::span<dir*> dirs, global_entry_data const& ge_data,
|
||||||
|
uint32_t num_inodes) override;
|
||||||
|
|
||||||
|
void gather_global_entry_data(global_entry_data const& ge_data) override;
|
||||||
|
|
||||||
|
thrift::metadata::metadata const& build() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
thrift::metadata::inode_size_cache build_inode_size_cache() const;
|
||||||
|
|
||||||
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
|
thrift::metadata::metadata md_;
|
||||||
|
feature_set features_;
|
||||||
|
metadata_options const& options_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
thrift::metadata::inode_size_cache
|
||||||
|
metadata_builder_<LoggerPolicy>::build_inode_size_cache() const {
|
||||||
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
|
|
||||||
|
thrift::metadata::inode_size_cache cache;
|
||||||
|
cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count;
|
||||||
|
|
||||||
|
auto const& chunk_table = md_.chunk_table().value();
|
||||||
|
auto const& chunks = md_.chunks().value();
|
||||||
|
|
||||||
|
for (size_t ino = 1; ino < chunk_table.size() - 1; ++ino) {
|
||||||
|
auto const begin = chunk_table[ino];
|
||||||
|
auto const end = chunk_table[ino + 1];
|
||||||
|
auto const num_chunks = end - begin;
|
||||||
|
|
||||||
|
if (num_chunks >= options_.inode_size_cache_min_chunk_count) {
|
||||||
|
uint64_t size = 0;
|
||||||
|
|
||||||
|
for (uint32_t ix = begin; ix < end; ++ix) {
|
||||||
|
auto const& chunk = chunks[ix];
|
||||||
|
size += chunk.size().value();
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUG << "caching size " << size << " for inode " << ino << " with "
|
||||||
|
<< num_chunks << " chunks";
|
||||||
|
|
||||||
|
cache.lookup()->emplace(ino, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tv << "building inode size cache...";
|
||||||
|
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void metadata_builder_<LoggerPolicy>::gather_chunks(inode_manager const& im,
|
||||||
|
block_manager const& bm,
|
||||||
|
size_t chunk_count) {
|
||||||
|
md_.chunk_table()->resize(im.count() + 1);
|
||||||
|
md_.chunks().value().reserve(chunk_count);
|
||||||
|
|
||||||
|
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
|
||||||
|
auto const total_chunks = md_.chunks()->size();
|
||||||
|
DWARFS_NOTHROW(md_.chunk_table()->at(ino->num())) = total_chunks;
|
||||||
|
if (!ino->append_chunks_to(md_.chunks().value())) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
for (auto fp : ino->all()) {
|
||||||
|
oss << "\n " << fp->path_as_string();
|
||||||
|
}
|
||||||
|
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
|
||||||
|
<< ", the following files will be empty:" << oss.str();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
bm.map_logical_blocks(md_.chunks().value());
|
||||||
|
|
||||||
|
// insert dummy inode to help determine number of chunks per inode
|
||||||
|
DWARFS_NOTHROW(md_.chunk_table()->at(im.count())) = md_.chunks()->size();
|
||||||
|
|
||||||
|
LOG_DEBUG << "total number of unique files: " << im.count();
|
||||||
|
LOG_DEBUG << "total number of chunks: " << md_.chunks()->size();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void metadata_builder_<LoggerPolicy>::gather_entries(
|
||||||
|
std::span<dir*> dirs, global_entry_data const& ge_data,
|
||||||
|
uint32_t num_inodes) {
|
||||||
|
md_.dir_entries() = std::vector<thrift::metadata::dir_entry>();
|
||||||
|
md_.inodes()->resize(num_inodes);
|
||||||
|
md_.directories()->reserve(dirs.size() + 1);
|
||||||
|
|
||||||
|
for (auto p : dirs) {
|
||||||
|
if (!p->has_parent()) {
|
||||||
|
p->set_entry_index(md_.dir_entries()->size());
|
||||||
|
p->pack_entry(md_, ge_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
p->pack(md_, ge_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
thrift::metadata::directory dummy;
|
||||||
|
dummy.parent_entry() = 0;
|
||||||
|
dummy.first_entry() = md_.dir_entries()->size();
|
||||||
|
dummy.self_entry() = 0;
|
||||||
|
md_.directories()->push_back(dummy);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void metadata_builder_<LoggerPolicy>::gather_global_entry_data(
|
||||||
|
global_entry_data const& ge_data) {
|
||||||
|
md_.names() = ge_data.get_names();
|
||||||
|
|
||||||
|
md_.symlinks() = ge_data.get_symlinks();
|
||||||
|
|
||||||
|
md_.uids() = options_.uid ? std::vector<file_stat::uid_type>{*options_.uid}
|
||||||
|
: ge_data.get_uids();
|
||||||
|
|
||||||
|
md_.gids() = options_.gid ? std::vector<file_stat::gid_type>{*options_.gid}
|
||||||
|
: ge_data.get_gids();
|
||||||
|
|
||||||
|
md_.modes() = ge_data.get_modes();
|
||||||
|
|
||||||
|
md_.timestamp_base() = ge_data.get_timestamp_base();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
thrift::metadata::metadata const& metadata_builder_<LoggerPolicy>::build() {
|
||||||
|
LOG_VERBOSE << "building metadata";
|
||||||
|
|
||||||
|
thrift::metadata::fs_options fsopts;
|
||||||
|
fsopts.mtime_only() = !options_.keep_all_times;
|
||||||
|
if (options_.time_resolution_sec > 1) {
|
||||||
|
fsopts.time_resolution_sec() = options_.time_resolution_sec;
|
||||||
|
}
|
||||||
|
fsopts.packed_chunk_table() = options_.pack_chunk_table;
|
||||||
|
fsopts.packed_directories() = options_.pack_directories;
|
||||||
|
fsopts.packed_shared_files_table() = options_.pack_shared_files_table;
|
||||||
|
|
||||||
|
if (options_.pack_directories) {
|
||||||
|
// pack directories
|
||||||
|
uint32_t last_first_entry = 0;
|
||||||
|
|
||||||
|
for (auto& d : md_.directories().value()) {
|
||||||
|
d.parent_entry() = 0; // this will be recovered
|
||||||
|
d.self_entry() = 0; // this will be recovered
|
||||||
|
auto delta = d.first_entry().value() - last_first_entry;
|
||||||
|
last_first_entry = d.first_entry().value();
|
||||||
|
d.first_entry() = delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
md_.reg_file_size_cache() = build_inode_size_cache();
|
||||||
|
|
||||||
|
if (options_.pack_chunk_table) {
|
||||||
|
// delta-compress chunk table
|
||||||
|
std::adjacent_difference(md_.chunk_table()->begin(),
|
||||||
|
md_.chunk_table()->end(),
|
||||||
|
md_.chunk_table()->begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options_.pack_shared_files_table) {
|
||||||
|
if (!md_.shared_files_table()->empty()) {
|
||||||
|
auto& sf = md_.shared_files_table().value();
|
||||||
|
DWARFS_CHECK(std::ranges::is_sorted(sf),
|
||||||
|
"shared files vector not sorted");
|
||||||
|
std::vector<uint32_t> compressed;
|
||||||
|
compressed.reserve(sf.back() + 1);
|
||||||
|
|
||||||
|
uint32_t count = 0;
|
||||||
|
uint32_t index = 0;
|
||||||
|
for (auto i : sf) {
|
||||||
|
if (i == index) {
|
||||||
|
++count;
|
||||||
|
} else {
|
||||||
|
++index;
|
||||||
|
DWARFS_CHECK(i == index, "inconsistent shared files vector");
|
||||||
|
DWARFS_CHECK(count >= 2, "unique file in shared files vector");
|
||||||
|
compressed.emplace_back(count - 2);
|
||||||
|
count = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compressed.emplace_back(count - 2);
|
||||||
|
|
||||||
|
DWARFS_CHECK(compressed.size() == sf.back() + 1,
|
||||||
|
"unexpected compressed vector size");
|
||||||
|
|
||||||
|
sf.swap(compressed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!options_.plain_names_table) {
|
||||||
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
md_.compact_names() = string_table::pack(
|
||||||
|
md_.names().value(), string_table::pack_options(
|
||||||
|
options_.pack_names, options_.pack_names_index,
|
||||||
|
options_.force_pack_string_tables));
|
||||||
|
thrift::metadata::metadata tmp;
|
||||||
|
md_.names().copy_from(tmp.names());
|
||||||
|
ti << "saving names table...";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!options_.plain_symlinks_table) {
|
||||||
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
md_.compact_symlinks() = string_table::pack(
|
||||||
|
md_.symlinks().value(),
|
||||||
|
string_table::pack_options(options_.pack_symlinks,
|
||||||
|
options_.pack_symlinks_index,
|
||||||
|
options_.force_pack_string_tables));
|
||||||
|
thrift::metadata::metadata tmp;
|
||||||
|
md_.symlinks().copy_from(tmp.symlinks());
|
||||||
|
ti << "saving symlinks table...";
|
||||||
|
}
|
||||||
|
|
||||||
|
md_.options() = fsopts;
|
||||||
|
md_.features() = features_.get();
|
||||||
|
md_.dwarfs_version() = std::string("libdwarfs ") + DWARFS_GIT_ID;
|
||||||
|
if (!options_.no_create_timestamp) {
|
||||||
|
md_.create_timestamp() = std::time(nullptr);
|
||||||
|
}
|
||||||
|
md_.preferred_path_separator() =
|
||||||
|
static_cast<uint32_t>(std::filesystem::path::preferred_separator);
|
||||||
|
|
||||||
|
return md_;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
metadata_builder::metadata_builder(logger& lgr, metadata_options const& options)
|
||||||
|
: impl_{
|
||||||
|
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
||||||
|
lgr, options)} {}
|
||||||
|
|
||||||
|
metadata_builder::metadata_builder(logger& lgr,
|
||||||
|
thrift::metadata::metadata const& md,
|
||||||
|
metadata_options const& options)
|
||||||
|
: impl_{
|
||||||
|
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
||||||
|
lgr, md, options)} {}
|
||||||
|
|
||||||
|
metadata_builder::metadata_builder(logger& lgr, thrift::metadata::metadata&& md,
|
||||||
|
metadata_options const& options)
|
||||||
|
: impl_{
|
||||||
|
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
||||||
|
lgr, std::move(md), options)} {}
|
||||||
|
|
||||||
|
metadata_builder::~metadata_builder() = default;
|
||||||
|
|
||||||
|
} // namespace dwarfs::writer::internal
|
@ -62,8 +62,6 @@
|
|||||||
#include <dwarfs/writer/segmenter_factory.h>
|
#include <dwarfs/writer/segmenter_factory.h>
|
||||||
#include <dwarfs/writer/writer_progress.h>
|
#include <dwarfs/writer/writer_progress.h>
|
||||||
|
|
||||||
#include <dwarfs/internal/features.h>
|
|
||||||
#include <dwarfs/internal/string_table.h>
|
|
||||||
#include <dwarfs/internal/worker_group.h>
|
#include <dwarfs/internal/worker_group.h>
|
||||||
#include <dwarfs/writer/internal/block_manager.h>
|
#include <dwarfs/writer/internal/block_manager.h>
|
||||||
#include <dwarfs/writer/internal/entry.h>
|
#include <dwarfs/writer/internal/entry.h>
|
||||||
@ -73,11 +71,10 @@
|
|||||||
#include <dwarfs/writer/internal/global_entry_data.h>
|
#include <dwarfs/writer/internal/global_entry_data.h>
|
||||||
#include <dwarfs/writer/internal/inode.h>
|
#include <dwarfs/writer/internal/inode.h>
|
||||||
#include <dwarfs/writer/internal/inode_manager.h>
|
#include <dwarfs/writer/internal/inode_manager.h>
|
||||||
|
#include <dwarfs/writer/internal/metadata_builder.h>
|
||||||
#include <dwarfs/writer/internal/metadata_freezer.h>
|
#include <dwarfs/writer/internal/metadata_freezer.h>
|
||||||
#include <dwarfs/writer/internal/progress.h>
|
#include <dwarfs/writer/internal/progress.h>
|
||||||
|
|
||||||
#include <dwarfs/gen-cpp2/metadata_types.h>
|
|
||||||
|
|
||||||
namespace dwarfs::writer {
|
namespace dwarfs::writer {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@ -190,24 +187,7 @@ class save_directories_visitor : public visitor_base {
|
|||||||
|
|
||||||
void visit(dir* p) override { directories_.at(p->inode_num().value()) = p; }
|
void visit(dir* p) override { directories_.at(p->inode_num().value()) = p; }
|
||||||
|
|
||||||
void pack(thrift::metadata::metadata& mv2, global_entry_data& ge_data) {
|
std::span<dir*> get_directories() { return directories_; }
|
||||||
for (auto p : directories_) {
|
|
||||||
if (!p->has_parent()) {
|
|
||||||
p->set_entry_index(mv2.dir_entries()->size());
|
|
||||||
p->pack_entry(mv2, ge_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
p->pack(mv2, ge_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
thrift::metadata::directory dummy;
|
|
||||||
dummy.parent_entry() = 0;
|
|
||||||
dummy.first_entry() = mv2.dir_entries()->size();
|
|
||||||
dummy.self_entry() = 0;
|
|
||||||
mv2.directories()->push_back(dummy);
|
|
||||||
|
|
||||||
directories_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<dir*> directories_;
|
std::vector<dir*> directories_;
|
||||||
@ -232,36 +212,6 @@ class save_shared_files_visitor : public visitor_base {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pack_shared_files() {
|
|
||||||
if (!shared_files_.empty()) {
|
|
||||||
DWARFS_CHECK(std::ranges::is_sorted(shared_files_),
|
|
||||||
"shared files vector not sorted");
|
|
||||||
std::vector<uint32_t> compressed;
|
|
||||||
compressed.reserve(shared_files_.back() + 1);
|
|
||||||
|
|
||||||
uint32_t count = 0;
|
|
||||||
uint32_t index = 0;
|
|
||||||
for (auto i : shared_files_) {
|
|
||||||
if (i == index) {
|
|
||||||
++count;
|
|
||||||
} else {
|
|
||||||
++index;
|
|
||||||
DWARFS_CHECK(i == index, "inconsistent shared files vector");
|
|
||||||
DWARFS_CHECK(count >= 2, "unique file in shared files vector");
|
|
||||||
compressed.emplace_back(count - 2);
|
|
||||||
count = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
compressed.emplace_back(count - 2);
|
|
||||||
|
|
||||||
DWARFS_CHECK(compressed.size() == shared_files_.back() + 1,
|
|
||||||
"unexpected compressed vector size");
|
|
||||||
|
|
||||||
shared_files_.swap(compressed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint32_t>& get_shared_files() { return shared_files_; }
|
std::vector<uint32_t>& get_shared_files() { return shared_files_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -792,17 +742,14 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
global_entry_data ge_data(options_);
|
global_entry_data ge_data(options_.metadata);
|
||||||
thrift::metadata::metadata mv2;
|
metadata_builder mdb(LOG_GET_LOGGER, options_.metadata);
|
||||||
feature_set features;
|
|
||||||
|
|
||||||
mv2.symlink_table()->resize(first_file_inode - first_link_inode);
|
|
||||||
|
|
||||||
LOG_INFO << "assigning device inodes...";
|
LOG_INFO << "assigning device inodes...";
|
||||||
uint32_t first_pipe_inode = first_device_inode;
|
uint32_t first_pipe_inode = first_device_inode;
|
||||||
device_set_inode_visitor devsiv(first_pipe_inode);
|
device_set_inode_visitor devsiv(first_pipe_inode);
|
||||||
root->accept(devsiv);
|
root->accept(devsiv);
|
||||||
mv2.devices() = std::move(devsiv.device_ids());
|
mdb.set_devices(std::move(devsiv.device_ids()));
|
||||||
|
|
||||||
LOG_INFO << "assigning pipe/socket inodes...";
|
LOG_INFO << "assigning pipe/socket inodes...";
|
||||||
uint32_t last_inode = first_pipe_inode;
|
uint32_t last_inode = first_pipe_inode;
|
||||||
@ -811,6 +758,8 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
LOG_INFO << "building metadata...";
|
LOG_INFO << "building metadata...";
|
||||||
|
|
||||||
|
mdb.set_symlink_table_size(first_file_inode - first_link_inode);
|
||||||
|
|
||||||
wg_.add_job([&] {
|
wg_.add_job([&] {
|
||||||
LOG_INFO << "saving names and symlinks...";
|
LOG_INFO << "saving names and symlinks...";
|
||||||
names_and_symlinks_visitor nlv(ge_data);
|
names_and_symlinks_visitor nlv(ge_data);
|
||||||
@ -821,10 +770,10 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
LOG_INFO << "updating name and link indices...";
|
LOG_INFO << "updating name and link indices...";
|
||||||
root->walk([&](entry* ep) {
|
root->walk([&](entry* ep) {
|
||||||
ep->update(ge_data);
|
ep->update(ge_data);
|
||||||
if (auto lp = dynamic_cast<link*>(ep)) {
|
if (auto* lp = dynamic_cast<link*>(ep)) {
|
||||||
DWARFS_NOTHROW(mv2.symlink_table()->at(ep->inode_num().value() -
|
mdb.add_symlink_table_entry(
|
||||||
first_link_inode)) =
|
ep->inode_num().value() - first_link_inode,
|
||||||
ge_data.get_symlink_table_entry(lp->linkname());
|
ge_data.get_symlink_table_entry(lp->linkname()));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@ -955,124 +904,18 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
prog.run_sync([&] { root->clear_name(); });
|
prog.run_sync([&] { root->clear_name(); });
|
||||||
|
|
||||||
LOG_INFO << "saving chunks...";
|
LOG_INFO << "saving chunks...";
|
||||||
mv2.chunk_table()->resize(im.count() + 1);
|
mdb.gather_chunks(im, *blockmgr, prog.chunk_count);
|
||||||
|
|
||||||
auto& size_cache = mv2.reg_file_size_cache().emplace();
|
|
||||||
size_cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count;
|
|
||||||
|
|
||||||
// TODO: we should be able to start this once all blocks have been
|
|
||||||
// submitted for compression
|
|
||||||
mv2.chunks().value().reserve(prog.chunk_count);
|
|
||||||
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
|
|
||||||
auto const total_chunks = mv2.chunks()->size();
|
|
||||||
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = total_chunks;
|
|
||||||
if (!ino->append_chunks_to(mv2.chunks().value())) {
|
|
||||||
std::ostringstream oss;
|
|
||||||
for (auto fp : ino->all()) {
|
|
||||||
oss << "\n " << fp->path_as_string();
|
|
||||||
}
|
|
||||||
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
|
|
||||||
<< ", the following files will be empty:" << oss.str();
|
|
||||||
}
|
|
||||||
auto num_inode_chunks = mv2.chunks()->size() - total_chunks;
|
|
||||||
if (num_inode_chunks >= options_.inode_size_cache_min_chunk_count) {
|
|
||||||
LOG_DEBUG << "caching size " << ino->size() << " for inode " << ino->num()
|
|
||||||
<< " with " << num_inode_chunks << " chunks";
|
|
||||||
size_cache.lookup()->emplace(ino->num(), ino->size());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
blockmgr->map_logical_blocks(mv2.chunks().value());
|
|
||||||
|
|
||||||
// insert dummy inode to help determine number of chunks per inode
|
|
||||||
DWARFS_NOTHROW(mv2.chunk_table()->at(im.count())) = mv2.chunks()->size();
|
|
||||||
|
|
||||||
LOG_DEBUG << "total number of unique files: " << im.count();
|
|
||||||
LOG_DEBUG << "total number of chunks: " << mv2.chunks()->size();
|
|
||||||
|
|
||||||
LOG_INFO << "saving directories...";
|
LOG_INFO << "saving directories...";
|
||||||
mv2.dir_entries() = std::vector<thrift::metadata::dir_entry>();
|
|
||||||
mv2.inodes()->resize(last_inode);
|
|
||||||
mv2.directories()->reserve(first_link_inode + 1);
|
|
||||||
save_directories_visitor sdv(first_link_inode);
|
save_directories_visitor sdv(first_link_inode);
|
||||||
root->accept(sdv);
|
root->accept(sdv);
|
||||||
sdv.pack(mv2, ge_data);
|
mdb.gather_entries(sdv.get_directories(), ge_data, last_inode);
|
||||||
|
|
||||||
if (options_.pack_directories) {
|
|
||||||
// pack directories
|
|
||||||
uint32_t last_first_entry = 0;
|
|
||||||
|
|
||||||
for (auto& d : mv2.directories().value()) {
|
|
||||||
d.parent_entry() = 0; // this will be recovered
|
|
||||||
d.self_entry() = 0; // this will be recovered
|
|
||||||
auto delta = d.first_entry().value() - last_first_entry;
|
|
||||||
last_first_entry = d.first_entry().value();
|
|
||||||
d.first_entry() = delta;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options_.pack_chunk_table) {
|
|
||||||
// delta-compress chunk table
|
|
||||||
std::adjacent_difference(mv2.chunk_table()->begin(),
|
|
||||||
mv2.chunk_table()->end(),
|
|
||||||
mv2.chunk_table()->begin());
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_INFO << "saving shared files table...";
|
LOG_INFO << "saving shared files table...";
|
||||||
save_shared_files_visitor ssfv(first_file_inode, first_device_inode,
|
save_shared_files_visitor ssfv(first_file_inode, first_device_inode,
|
||||||
fs.num_unique());
|
fs.num_unique());
|
||||||
root->accept(ssfv);
|
root->accept(ssfv);
|
||||||
if (options_.pack_shared_files_table) {
|
mdb.set_shared_files_table(std::move(ssfv.get_shared_files()));
|
||||||
ssfv.pack_shared_files();
|
|
||||||
}
|
|
||||||
mv2.shared_files_table() = std::move(ssfv.get_shared_files());
|
|
||||||
|
|
||||||
thrift::metadata::fs_options fsopts;
|
|
||||||
fsopts.mtime_only() = !options_.keep_all_times;
|
|
||||||
if (options_.time_resolution_sec > 1) {
|
|
||||||
fsopts.time_resolution_sec() = options_.time_resolution_sec;
|
|
||||||
}
|
|
||||||
fsopts.packed_chunk_table() = options_.pack_chunk_table;
|
|
||||||
fsopts.packed_directories() = options_.pack_directories;
|
|
||||||
fsopts.packed_shared_files_table() = options_.pack_shared_files_table;
|
|
||||||
|
|
||||||
if (options_.plain_names_table) {
|
|
||||||
mv2.names() = ge_data.get_names();
|
|
||||||
} else {
|
|
||||||
auto ti = LOG_TIMED_INFO;
|
|
||||||
mv2.compact_names() = string_table::pack(
|
|
||||||
ge_data.get_names(), string_table::pack_options(
|
|
||||||
options_.pack_names, options_.pack_names_index,
|
|
||||||
options_.force_pack_string_tables));
|
|
||||||
ti << "saving names table...";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options_.plain_symlinks_table) {
|
|
||||||
mv2.symlinks() = ge_data.get_symlinks();
|
|
||||||
} else {
|
|
||||||
auto ti = LOG_TIMED_INFO;
|
|
||||||
mv2.compact_symlinks() = string_table::pack(
|
|
||||||
ge_data.get_symlinks(),
|
|
||||||
string_table::pack_options(options_.pack_symlinks,
|
|
||||||
options_.pack_symlinks_index,
|
|
||||||
options_.force_pack_string_tables));
|
|
||||||
ti << "saving symlinks table...";
|
|
||||||
}
|
|
||||||
|
|
||||||
mv2.uids() = ge_data.get_uids();
|
|
||||||
mv2.gids() = ge_data.get_gids();
|
|
||||||
mv2.modes() = ge_data.get_modes();
|
|
||||||
mv2.timestamp_base() = ge_data.get_timestamp_base();
|
|
||||||
mv2.block_size() = segmenter_factory_.get_block_size();
|
|
||||||
mv2.total_fs_size() = prog.original_size;
|
|
||||||
mv2.total_hardlink_size() = prog.hardlink_size;
|
|
||||||
mv2.options() = fsopts;
|
|
||||||
mv2.dwarfs_version() = std::string("libdwarfs ") + DWARFS_GIT_ID;
|
|
||||||
if (!options_.no_create_timestamp) {
|
|
||||||
mv2.create_timestamp() = std::time(nullptr);
|
|
||||||
}
|
|
||||||
mv2.preferred_path_separator() =
|
|
||||||
static_cast<uint32_t>(std::filesystem::path::preferred_separator);
|
|
||||||
|
|
||||||
if (auto catmgr = options_.inode.categorizer_mgr) {
|
if (auto catmgr = options_.inode.categorizer_mgr) {
|
||||||
std::unordered_map<fragment_category::value_type,
|
std::unordered_map<fragment_category::value_type,
|
||||||
@ -1097,13 +940,16 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
written_categories.begin(),
|
written_categories.begin(),
|
||||||
[&](auto const& cat) { return category_indices.at(cat); });
|
[&](auto const& cat) { return category_indices.at(cat); });
|
||||||
|
|
||||||
mv2.category_names() = std::move(category_names);
|
mdb.set_category_names(std::move(category_names));
|
||||||
mv2.block_categories() = std::move(written_categories);
|
mdb.set_block_categories(std::move(written_categories));
|
||||||
}
|
}
|
||||||
|
|
||||||
mv2.features() = features.get();
|
mdb.set_block_size(segmenter_factory_.get_block_size());
|
||||||
|
mdb.set_total_fs_size(prog.original_size);
|
||||||
|
mdb.set_total_hardlink_size(prog.hardlink_size);
|
||||||
|
mdb.gather_global_entry_data(ge_data);
|
||||||
|
|
||||||
auto [schema, data] = metadata_freezer::freeze(mv2);
|
auto [schema, data] = metadata_freezer::freeze(mdb.build());
|
||||||
|
|
||||||
LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size());
|
LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size());
|
||||||
|
|
||||||
|
@ -117,17 +117,17 @@ make_filesystem(::benchmark::State const* state,
|
|||||||
|
|
||||||
options.with_devices = true;
|
options.with_devices = true;
|
||||||
options.with_specials = true;
|
options.with_specials = true;
|
||||||
options.keep_all_times = false;
|
options.metadata.keep_all_times = false;
|
||||||
options.pack_chunk_table = true;
|
options.metadata.pack_chunk_table = true;
|
||||||
options.pack_directories = state ? state->range(0) : true;
|
options.metadata.pack_directories = state ? state->range(0) : true;
|
||||||
options.pack_shared_files_table = true;
|
options.metadata.pack_shared_files_table = true;
|
||||||
options.pack_names = state ? state->range(2) : true;
|
options.metadata.pack_names = state ? state->range(2) : true;
|
||||||
options.pack_names_index = state ? state->range(3) : true;
|
options.metadata.pack_names_index = state ? state->range(3) : true;
|
||||||
options.pack_symlinks = state ? state->range(2) : true;
|
options.metadata.pack_symlinks = state ? state->range(2) : true;
|
||||||
options.pack_symlinks_index = state ? state->range(3) : true;
|
options.metadata.pack_symlinks_index = state ? state->range(3) : true;
|
||||||
options.force_pack_string_tables = true;
|
options.metadata.force_pack_string_tables = true;
|
||||||
options.plain_names_table = state ? state->range(1) : false;
|
options.metadata.plain_names_table = state ? state->range(1) : false;
|
||||||
options.plain_symlinks_table = state ? state->range(1) : false;
|
options.metadata.plain_symlinks_table = state ? state->range(1) : false;
|
||||||
|
|
||||||
test::test_logger lgr;
|
test::test_logger lgr;
|
||||||
|
|
||||||
|
@ -156,28 +156,28 @@ void basic_end_to_end_test(
|
|||||||
options.with_devices = with_devices;
|
options.with_devices = with_devices;
|
||||||
options.with_specials = with_specials;
|
options.with_specials = with_specials;
|
||||||
options.inode.fragment_order.set_default(order_opts);
|
options.inode.fragment_order.set_default(order_opts);
|
||||||
options.keep_all_times = keep_all_times;
|
options.metadata.keep_all_times = keep_all_times;
|
||||||
options.pack_chunk_table = pack_chunk_table;
|
options.metadata.pack_chunk_table = pack_chunk_table;
|
||||||
options.pack_directories = pack_directories;
|
options.metadata.pack_directories = pack_directories;
|
||||||
options.pack_shared_files_table = pack_shared_files_table;
|
options.metadata.pack_shared_files_table = pack_shared_files_table;
|
||||||
options.pack_names = pack_names;
|
options.metadata.pack_names = pack_names;
|
||||||
options.pack_names_index = pack_names_index;
|
options.metadata.pack_names_index = pack_names_index;
|
||||||
options.pack_symlinks = pack_symlinks;
|
options.metadata.pack_symlinks = pack_symlinks;
|
||||||
options.pack_symlinks_index = pack_symlinks_index;
|
options.metadata.pack_symlinks_index = pack_symlinks_index;
|
||||||
options.force_pack_string_tables = true;
|
options.metadata.force_pack_string_tables = true;
|
||||||
options.plain_names_table = plain_names_table;
|
options.metadata.plain_names_table = plain_names_table;
|
||||||
options.plain_symlinks_table = plain_symlinks_table;
|
options.metadata.plain_symlinks_table = plain_symlinks_table;
|
||||||
|
|
||||||
if (set_uid) {
|
if (set_uid) {
|
||||||
options.uid = 0;
|
options.metadata.uid = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (set_gid) {
|
if (set_gid) {
|
||||||
options.gid = 0;
|
options.metadata.gid = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (set_time) {
|
if (set_time) {
|
||||||
options.timestamp = 4711;
|
options.metadata.timestamp = 4711;
|
||||||
}
|
}
|
||||||
|
|
||||||
test::test_logger lgr;
|
test::test_logger lgr;
|
||||||
@ -697,14 +697,14 @@ TEST_P(packing_test, regression_empty_fs) {
|
|||||||
cfg.blockhash_window_size = 8;
|
cfg.blockhash_window_size = 8;
|
||||||
cfg.block_size_bits = 10;
|
cfg.block_size_bits = 10;
|
||||||
|
|
||||||
options.pack_chunk_table = pack_chunk_table;
|
options.metadata.pack_chunk_table = pack_chunk_table;
|
||||||
options.pack_directories = pack_directories;
|
options.metadata.pack_directories = pack_directories;
|
||||||
options.pack_shared_files_table = pack_shared_files_table;
|
options.metadata.pack_shared_files_table = pack_shared_files_table;
|
||||||
options.pack_names = pack_names;
|
options.metadata.pack_names = pack_names;
|
||||||
options.pack_names_index = pack_names_index;
|
options.metadata.pack_names_index = pack_names_index;
|
||||||
options.pack_symlinks = pack_symlinks;
|
options.metadata.pack_symlinks = pack_symlinks;
|
||||||
options.pack_symlinks_index = pack_symlinks_index;
|
options.metadata.pack_symlinks_index = pack_symlinks_index;
|
||||||
options.force_pack_string_tables = true;
|
options.metadata.force_pack_string_tables = true;
|
||||||
|
|
||||||
test::test_logger lgr;
|
test::test_logger lgr;
|
||||||
|
|
||||||
@ -917,7 +917,7 @@ TEST_P(file_scanner, inode_ordering) {
|
|||||||
|
|
||||||
opts.file_hash_algorithm = file_hash_algo;
|
opts.file_hash_algorithm = file_hash_algo;
|
||||||
opts.inode.fragment_order.set_default(order_opts);
|
opts.inode.fragment_order.set_default(order_opts);
|
||||||
opts.no_create_timestamp = true;
|
opts.metadata.no_create_timestamp = true;
|
||||||
|
|
||||||
auto input = std::make_shared<test::os_access_mock>();
|
auto input = std::make_shared<test::os_access_mock>();
|
||||||
#if defined(DWARFS_TEST_RUNNING_ON_ASAN) || defined(DWARFS_TEST_RUNNING_ON_TSAN)
|
#if defined(DWARFS_TEST_RUNNING_ON_ASAN) || defined(DWARFS_TEST_RUNNING_ON_TSAN)
|
||||||
@ -1963,7 +1963,7 @@ TEST(filesystem, inode_size_cache) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
writer::scanner_options options;
|
writer::scanner_options options;
|
||||||
options.inode_size_cache_min_chunk_count = 32;
|
options.metadata.inode_size_cache_min_chunk_count = 32;
|
||||||
|
|
||||||
writer::segmenter::config cfg;
|
writer::segmenter::config cfg;
|
||||||
cfg.block_size_bits = 16;
|
cfg.block_size_bits = 16;
|
||||||
|
@ -652,13 +652,13 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
po::value<std::string>(&chmod_str),
|
po::value<std::string>(&chmod_str),
|
||||||
"recursively apply permission changes")
|
"recursively apply permission changes")
|
||||||
("no-create-timestamp",
|
("no-create-timestamp",
|
||||||
po::value<bool>(&options.no_create_timestamp)->zero_tokens(),
|
po::value<bool>(&options.metadata.no_create_timestamp)->zero_tokens(),
|
||||||
"don't add create timestamp to file system")
|
"don't add create timestamp to file system")
|
||||||
("set-time",
|
("set-time",
|
||||||
po::value<std::string>(×tamp),
|
po::value<std::string>(×tamp),
|
||||||
"set timestamp for whole file system (unixtime or 'now')")
|
"set timestamp for whole file system (unixtime or 'now')")
|
||||||
("keep-all-times",
|
("keep-all-times",
|
||||||
po::value<bool>(&options.keep_all_times)->zero_tokens(),
|
po::value<bool>(&options.metadata.keep_all_times)->zero_tokens(),
|
||||||
"save atime and ctime in addition to mtime")
|
"save atime and ctime in addition to mtime")
|
||||||
("time-resolution",
|
("time-resolution",
|
||||||
po::value<std::string>(&time_resolution)->default_value("sec"),
|
po::value<std::string>(&time_resolution)->default_value("sec"),
|
||||||
@ -1004,24 +1004,25 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (vm.contains("set-owner")) {
|
if (vm.contains("set-owner")) {
|
||||||
options.uid = uid;
|
options.metadata.uid = uid;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vm.contains("set-group")) {
|
if (vm.contains("set-group")) {
|
||||||
options.gid = gid;
|
options.metadata.gid = gid;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vm.contains("set-time")) {
|
if (vm.contains("set-time")) {
|
||||||
if (timestamp == "now") {
|
if (timestamp == "now") {
|
||||||
options.timestamp = std::time(nullptr);
|
options.metadata.timestamp = std::time(nullptr);
|
||||||
} else if (auto val = try_to<uint64_t>(timestamp)) {
|
} else if (auto val = try_to<uint64_t>(timestamp)) {
|
||||||
options.timestamp = val;
|
options.metadata.timestamp = val;
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
auto tp = parse_time_point(timestamp);
|
auto tp = parse_time_point(timestamp);
|
||||||
options.timestamp = std::chrono::duration_cast<std::chrono::seconds>(
|
options.metadata.timestamp =
|
||||||
tp.time_since_epoch())
|
std::chrono::duration_cast<std::chrono::seconds>(
|
||||||
.count();
|
tp.time_since_epoch())
|
||||||
|
.count();
|
||||||
} catch (std::exception const& e) {
|
} catch (std::exception const& e) {
|
||||||
iol.err << "error: " << e.what() << "\n";
|
iol.err << "error: " << e.what() << "\n";
|
||||||
return 1;
|
return 1;
|
||||||
@ -1031,10 +1032,10 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
|
|
||||||
if (auto it = time_resolutions.find(time_resolution);
|
if (auto it = time_resolutions.find(time_resolution);
|
||||||
it != time_resolutions.end()) {
|
it != time_resolutions.end()) {
|
||||||
options.time_resolution_sec = it->second;
|
options.metadata.time_resolution_sec = it->second;
|
||||||
} else if (auto val = try_to<uint32_t>(time_resolution)) {
|
} else if (auto val = try_to<uint32_t>(time_resolution)) {
|
||||||
options.time_resolution_sec = *val;
|
options.metadata.time_resolution_sec = *val;
|
||||||
if (options.time_resolution_sec == 0) {
|
if (options.metadata.time_resolution_sec == 0) {
|
||||||
iol.err << "error: the argument to '--time-resolution' must be nonzero\n";
|
iol.err << "error: the argument to '--time-resolution' must be nonzero\n";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -1046,45 +1047,45 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
|
|
||||||
if (!pack_metadata.empty() and pack_metadata != "none") {
|
if (!pack_metadata.empty() and pack_metadata != "none") {
|
||||||
if (pack_metadata == "auto") {
|
if (pack_metadata == "auto") {
|
||||||
options.force_pack_string_tables = false;
|
options.metadata.force_pack_string_tables = false;
|
||||||
options.pack_chunk_table = false;
|
options.metadata.pack_chunk_table = false;
|
||||||
options.pack_directories = false;
|
options.metadata.pack_directories = false;
|
||||||
options.pack_shared_files_table = false;
|
options.metadata.pack_shared_files_table = false;
|
||||||
options.pack_names = true;
|
options.metadata.pack_names = true;
|
||||||
options.pack_names_index = false;
|
options.metadata.pack_names_index = false;
|
||||||
options.pack_symlinks = true;
|
options.metadata.pack_symlinks = true;
|
||||||
options.pack_symlinks_index = false;
|
options.metadata.pack_symlinks_index = false;
|
||||||
} else {
|
} else {
|
||||||
auto pack_opts =
|
auto pack_opts =
|
||||||
split_to<std::vector<std::string_view>>(pack_metadata, ',');
|
split_to<std::vector<std::string_view>>(pack_metadata, ',');
|
||||||
for (auto const& opt : pack_opts) {
|
for (auto const& opt : pack_opts) {
|
||||||
if (opt == "chunk_table") {
|
if (opt == "chunk_table") {
|
||||||
options.pack_chunk_table = true;
|
options.metadata.pack_chunk_table = true;
|
||||||
} else if (opt == "directories") {
|
} else if (opt == "directories") {
|
||||||
options.pack_directories = true;
|
options.metadata.pack_directories = true;
|
||||||
} else if (opt == "shared_files") {
|
} else if (opt == "shared_files") {
|
||||||
options.pack_shared_files_table = true;
|
options.metadata.pack_shared_files_table = true;
|
||||||
} else if (opt == "names") {
|
} else if (opt == "names") {
|
||||||
options.pack_names = true;
|
options.metadata.pack_names = true;
|
||||||
} else if (opt == "names_index") {
|
} else if (opt == "names_index") {
|
||||||
options.pack_names_index = true;
|
options.metadata.pack_names_index = true;
|
||||||
} else if (opt == "symlinks") {
|
} else if (opt == "symlinks") {
|
||||||
options.pack_symlinks = true;
|
options.metadata.pack_symlinks = true;
|
||||||
} else if (opt == "symlinks_index") {
|
} else if (opt == "symlinks_index") {
|
||||||
options.pack_symlinks_index = true;
|
options.metadata.pack_symlinks_index = true;
|
||||||
} else if (opt == "force") {
|
} else if (opt == "force") {
|
||||||
options.force_pack_string_tables = true;
|
options.metadata.force_pack_string_tables = true;
|
||||||
} else if (opt == "plain") {
|
} else if (opt == "plain") {
|
||||||
options.plain_names_table = true;
|
options.metadata.plain_names_table = true;
|
||||||
options.plain_symlinks_table = true;
|
options.metadata.plain_symlinks_table = true;
|
||||||
} else if (opt == "all") {
|
} else if (opt == "all") {
|
||||||
options.pack_chunk_table = true;
|
options.metadata.pack_chunk_table = true;
|
||||||
options.pack_directories = true;
|
options.metadata.pack_directories = true;
|
||||||
options.pack_shared_files_table = true;
|
options.metadata.pack_shared_files_table = true;
|
||||||
options.pack_names = true;
|
options.metadata.pack_names = true;
|
||||||
options.pack_names_index = true;
|
options.metadata.pack_names_index = true;
|
||||||
options.pack_symlinks = true;
|
options.metadata.pack_symlinks = true;
|
||||||
options.pack_symlinks_index = true;
|
options.metadata.pack_symlinks_index = true;
|
||||||
} else {
|
} else {
|
||||||
iol.err << "error: the argument ('" << opt
|
iol.err << "error: the argument ('" << opt
|
||||||
<< "') to '--pack-metadata' is invalid\n";
|
<< "') to '--pack-metadata' is invalid\n";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user