mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-16 15:58:06 -04:00
feat(metadata): add block category information
This commit is contained in:
parent
6aba533688
commit
95eac672f8
@ -25,6 +25,7 @@
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "dwarfs/fragment_category.h"
|
||||
#include "dwarfs/gen-cpp2/metadata_types.h"
|
||||
|
||||
namespace dwarfs {
|
||||
@ -34,13 +35,17 @@ class block_manager {
|
||||
using chunk_type = thrift::metadata::chunk;
|
||||
|
||||
size_t get_logical_block() const;
|
||||
void set_written_block(size_t logical_block, size_t written_block);
|
||||
void set_written_block(size_t logical_block, size_t written_block,
|
||||
fragment_category::value_type category);
|
||||
void map_logical_blocks(std::vector<chunk_type>& vec);
|
||||
std::vector<fragment_category::value_type>
|
||||
get_written_block_categories() const;
|
||||
|
||||
private:
|
||||
std::mutex mutable mx_;
|
||||
size_t mutable num_blocks_{0};
|
||||
std::vector<std::optional<size_t>> block_map_;
|
||||
std::vector<std::optional<std::pair<size_t, fragment_category::value_type>>>
|
||||
block_map_;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -48,9 +48,8 @@ class segmenter {
|
||||
unsigned block_size_bits{22};
|
||||
};
|
||||
|
||||
using block_ready_cb =
|
||||
folly::Function<void(std::shared_ptr<block_data>,
|
||||
folly::Function<void(size_t)> physical_block_cb)>;
|
||||
using block_ready_cb = folly::Function<void(std::shared_ptr<block_data>,
|
||||
size_t logical_block_num)>;
|
||||
|
||||
segmenter(logger& lgr, progress& prog, std::shared_ptr<block_manager> blkmgr,
|
||||
config const& cfg, compression_constraints const& cc,
|
||||
|
@ -35,13 +35,14 @@ size_t block_manager::get_logical_block() const {
|
||||
}
|
||||
|
||||
void block_manager::set_written_block(size_t logical_block,
|
||||
size_t written_block) {
|
||||
size_t written_block,
|
||||
fragment_category::value_type category) {
|
||||
std::lock_guard lock{mx_};
|
||||
assert(logical_block < num_blocks_);
|
||||
if (block_map_.size() < num_blocks_) {
|
||||
block_map_.resize(num_blocks_);
|
||||
}
|
||||
block_map_[logical_block] = written_block;
|
||||
block_map_[logical_block] = std::make_pair(written_block, category);
|
||||
}
|
||||
|
||||
void block_manager::map_logical_blocks(std::vector<chunk_type>& vec) {
|
||||
@ -49,8 +50,26 @@ void block_manager::map_logical_blocks(std::vector<chunk_type>& vec) {
|
||||
for (auto& c : vec) {
|
||||
size_t block = c.get_block();
|
||||
assert(block < num_blocks_);
|
||||
c.block() = block_map_[block].value();
|
||||
c.block() = block_map_[block].value().first;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<fragment_category::value_type>
|
||||
block_manager::get_written_block_categories() const {
|
||||
std::vector<fragment_category::value_type> result;
|
||||
|
||||
{
|
||||
std::lock_guard lock{mx_};
|
||||
|
||||
result.resize(num_blocks_);
|
||||
|
||||
for (auto& b : block_map_) {
|
||||
auto& mapping = b.value();
|
||||
result[mapping.first] = mapping.second;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -700,9 +700,17 @@ void scanner_<LoggerPolicy>::scan(
|
||||
|
||||
auto seg = segmenter_factory_->create(
|
||||
category, cat_size, cc, blockmgr,
|
||||
[category, meta, &fsw](auto block, auto physical_block_cb) {
|
||||
fsw.write_block(category, std::move(block),
|
||||
std::move(physical_block_cb), meta);
|
||||
[category, meta, blockmgr, &fsw](auto block,
|
||||
auto logical_block_num) {
|
||||
fsw.write_block(
|
||||
category, std::move(block),
|
||||
[blockmgr, logical_block_num,
|
||||
category](auto physical_block_num) {
|
||||
blockmgr->set_written_block(logical_block_num,
|
||||
physical_block_num,
|
||||
category.value());
|
||||
},
|
||||
meta);
|
||||
});
|
||||
|
||||
for (auto ino : span) {
|
||||
@ -862,6 +870,33 @@ void scanner_<LoggerPolicy>::scan(
|
||||
mv2.preferred_path_separator() =
|
||||
static_cast<uint32_t>(std::filesystem::path::preferred_separator);
|
||||
|
||||
if (auto catmgr = options_.inode.categorizer_mgr) {
|
||||
std::unordered_map<fragment_category::value_type,
|
||||
fragment_category::value_type>
|
||||
category_indices;
|
||||
std::vector<std::string> category_names;
|
||||
|
||||
category_indices.reserve(frag_info.info.size());
|
||||
category_names.reserve(frag_info.info.size());
|
||||
|
||||
for (auto const& ci : frag_info.info) {
|
||||
auto [it, inserted] =
|
||||
category_indices.emplace(ci.category, category_names.size());
|
||||
if (inserted) {
|
||||
category_names.emplace_back(catmgr->category_name(ci.category));
|
||||
}
|
||||
}
|
||||
|
||||
auto written_categories = blockmgr->get_written_block_categories();
|
||||
|
||||
std::transform(written_categories.begin(), written_categories.end(),
|
||||
written_categories.begin(),
|
||||
[&](auto const& cat) { return category_indices.at(cat); });
|
||||
|
||||
mv2.category_names() = std::move(category_names);
|
||||
mv2.block_categories() = std::move(written_categories);
|
||||
}
|
||||
|
||||
auto [schema, data] = metadata_v2::freeze(mv2);
|
||||
|
||||
LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size());
|
||||
|
@ -969,11 +969,7 @@ template <typename LoggerPolicy, typename SegmentingPolicy>
|
||||
void segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
|
||||
auto& block = blocks_.back();
|
||||
block.finalize(stats_);
|
||||
block_ready_(block.data(), [blkmgr = blkmgr_,
|
||||
logical_block_num =
|
||||
block.num()](size_t physical_block_num) {
|
||||
blkmgr->set_written_block(logical_block_num, physical_block_num);
|
||||
});
|
||||
block_ready_(block.data(), block.num());
|
||||
++prog_.block_count;
|
||||
}
|
||||
|
||||
|
@ -144,13 +144,14 @@ void run_segmenter_test(unsigned iters, unsigned granularity,
|
||||
|
||||
std::vector<std::shared_ptr<dwarfs::block_data>> written;
|
||||
|
||||
dwarfs::segmenter seg(lgr, prog, blkmgr, cfg, cc, total_size,
|
||||
[&written](std::shared_ptr<dwarfs::block_data> blk,
|
||||
auto physical_block_cb) {
|
||||
size_t num = written.size();
|
||||
written.push_back(blk);
|
||||
physical_block_cb(num);
|
||||
});
|
||||
dwarfs::segmenter seg(
|
||||
lgr, prog, blkmgr, cfg, cc, total_size,
|
||||
[&written, blkmgr](std::shared_ptr<dwarfs::block_data> blk,
|
||||
auto logical_block_num) {
|
||||
auto physical_block_num = written.size();
|
||||
written.push_back(blk);
|
||||
blkmgr->set_written_block(logical_block_num, physical_block_num, 0);
|
||||
});
|
||||
|
||||
suspender.dismiss();
|
||||
|
||||
|
@ -361,8 +361,27 @@ struct metadata {
|
||||
// fields added with dwarfs-0.7.3, file system version 2.5 //
|
||||
//=========================================================//
|
||||
|
||||
// We don't need to increment the file system minor version
|
||||
// as file systems created with this new version are still
|
||||
// readable by older binaries as long as they don't use any
|
||||
// unsupported features (e.g. FLAC compression).
|
||||
|
||||
// The set of features used in this file system image. As long
|
||||
// as an older binary supports all features, it will be able
|
||||
// to use images created with newer versions.
|
||||
27: optional set<string> features
|
||||
|
||||
//=========================================================//
|
||||
// fields added with dwarfs-0.8.0, file system version 2.5 //
|
||||
//=========================================================//
|
||||
|
||||
// The set of categories used in this file system image. Used
|
||||
// for displaying and to select compression algorithms when
|
||||
// recompressing the image.
|
||||
28: optional list<string> category_names
|
||||
|
||||
// The category of each block in the file system image. The
|
||||
// index into this vector is the block number and the value
|
||||
// is an index into `category_names`.
|
||||
29: optional list<UInt32> block_categories
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user