mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-17 16:31:27 -04:00
feat(metadata): add block category information
This commit is contained in:
parent
6aba533688
commit
95eac672f8
@ -25,6 +25,7 @@
|
|||||||
#include <optional>
|
#include <optional>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "dwarfs/fragment_category.h"
|
||||||
#include "dwarfs/gen-cpp2/metadata_types.h"
|
#include "dwarfs/gen-cpp2/metadata_types.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
@ -34,13 +35,17 @@ class block_manager {
|
|||||||
using chunk_type = thrift::metadata::chunk;
|
using chunk_type = thrift::metadata::chunk;
|
||||||
|
|
||||||
size_t get_logical_block() const;
|
size_t get_logical_block() const;
|
||||||
void set_written_block(size_t logical_block, size_t written_block);
|
void set_written_block(size_t logical_block, size_t written_block,
|
||||||
|
fragment_category::value_type category);
|
||||||
void map_logical_blocks(std::vector<chunk_type>& vec);
|
void map_logical_blocks(std::vector<chunk_type>& vec);
|
||||||
|
std::vector<fragment_category::value_type>
|
||||||
|
get_written_block_categories() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::mutex mutable mx_;
|
std::mutex mutable mx_;
|
||||||
size_t mutable num_blocks_{0};
|
size_t mutable num_blocks_{0};
|
||||||
std::vector<std::optional<size_t>> block_map_;
|
std::vector<std::optional<std::pair<size_t, fragment_category::value_type>>>
|
||||||
|
block_map_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace dwarfs
|
} // namespace dwarfs
|
||||||
|
@ -48,9 +48,8 @@ class segmenter {
|
|||||||
unsigned block_size_bits{22};
|
unsigned block_size_bits{22};
|
||||||
};
|
};
|
||||||
|
|
||||||
using block_ready_cb =
|
using block_ready_cb = folly::Function<void(std::shared_ptr<block_data>,
|
||||||
folly::Function<void(std::shared_ptr<block_data>,
|
size_t logical_block_num)>;
|
||||||
folly::Function<void(size_t)> physical_block_cb)>;
|
|
||||||
|
|
||||||
segmenter(logger& lgr, progress& prog, std::shared_ptr<block_manager> blkmgr,
|
segmenter(logger& lgr, progress& prog, std::shared_ptr<block_manager> blkmgr,
|
||||||
config const& cfg, compression_constraints const& cc,
|
config const& cfg, compression_constraints const& cc,
|
||||||
|
@ -35,13 +35,14 @@ size_t block_manager::get_logical_block() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void block_manager::set_written_block(size_t logical_block,
|
void block_manager::set_written_block(size_t logical_block,
|
||||||
size_t written_block) {
|
size_t written_block,
|
||||||
|
fragment_category::value_type category) {
|
||||||
std::lock_guard lock{mx_};
|
std::lock_guard lock{mx_};
|
||||||
assert(logical_block < num_blocks_);
|
assert(logical_block < num_blocks_);
|
||||||
if (block_map_.size() < num_blocks_) {
|
if (block_map_.size() < num_blocks_) {
|
||||||
block_map_.resize(num_blocks_);
|
block_map_.resize(num_blocks_);
|
||||||
}
|
}
|
||||||
block_map_[logical_block] = written_block;
|
block_map_[logical_block] = std::make_pair(written_block, category);
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_manager::map_logical_blocks(std::vector<chunk_type>& vec) {
|
void block_manager::map_logical_blocks(std::vector<chunk_type>& vec) {
|
||||||
@ -49,8 +50,26 @@ void block_manager::map_logical_blocks(std::vector<chunk_type>& vec) {
|
|||||||
for (auto& c : vec) {
|
for (auto& c : vec) {
|
||||||
size_t block = c.get_block();
|
size_t block = c.get_block();
|
||||||
assert(block < num_blocks_);
|
assert(block < num_blocks_);
|
||||||
c.block() = block_map_[block].value();
|
c.block() = block_map_[block].value().first;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<fragment_category::value_type>
|
||||||
|
block_manager::get_written_block_categories() const {
|
||||||
|
std::vector<fragment_category::value_type> result;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard lock{mx_};
|
||||||
|
|
||||||
|
result.resize(num_blocks_);
|
||||||
|
|
||||||
|
for (auto& b : block_map_) {
|
||||||
|
auto& mapping = b.value();
|
||||||
|
result[mapping.first] = mapping.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace dwarfs
|
} // namespace dwarfs
|
||||||
|
@ -700,9 +700,17 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
auto seg = segmenter_factory_->create(
|
auto seg = segmenter_factory_->create(
|
||||||
category, cat_size, cc, blockmgr,
|
category, cat_size, cc, blockmgr,
|
||||||
[category, meta, &fsw](auto block, auto physical_block_cb) {
|
[category, meta, blockmgr, &fsw](auto block,
|
||||||
fsw.write_block(category, std::move(block),
|
auto logical_block_num) {
|
||||||
std::move(physical_block_cb), meta);
|
fsw.write_block(
|
||||||
|
category, std::move(block),
|
||||||
|
[blockmgr, logical_block_num,
|
||||||
|
category](auto physical_block_num) {
|
||||||
|
blockmgr->set_written_block(logical_block_num,
|
||||||
|
physical_block_num,
|
||||||
|
category.value());
|
||||||
|
},
|
||||||
|
meta);
|
||||||
});
|
});
|
||||||
|
|
||||||
for (auto ino : span) {
|
for (auto ino : span) {
|
||||||
@ -862,6 +870,33 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
mv2.preferred_path_separator() =
|
mv2.preferred_path_separator() =
|
||||||
static_cast<uint32_t>(std::filesystem::path::preferred_separator);
|
static_cast<uint32_t>(std::filesystem::path::preferred_separator);
|
||||||
|
|
||||||
|
if (auto catmgr = options_.inode.categorizer_mgr) {
|
||||||
|
std::unordered_map<fragment_category::value_type,
|
||||||
|
fragment_category::value_type>
|
||||||
|
category_indices;
|
||||||
|
std::vector<std::string> category_names;
|
||||||
|
|
||||||
|
category_indices.reserve(frag_info.info.size());
|
||||||
|
category_names.reserve(frag_info.info.size());
|
||||||
|
|
||||||
|
for (auto const& ci : frag_info.info) {
|
||||||
|
auto [it, inserted] =
|
||||||
|
category_indices.emplace(ci.category, category_names.size());
|
||||||
|
if (inserted) {
|
||||||
|
category_names.emplace_back(catmgr->category_name(ci.category));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto written_categories = blockmgr->get_written_block_categories();
|
||||||
|
|
||||||
|
std::transform(written_categories.begin(), written_categories.end(),
|
||||||
|
written_categories.begin(),
|
||||||
|
[&](auto const& cat) { return category_indices.at(cat); });
|
||||||
|
|
||||||
|
mv2.category_names() = std::move(category_names);
|
||||||
|
mv2.block_categories() = std::move(written_categories);
|
||||||
|
}
|
||||||
|
|
||||||
auto [schema, data] = metadata_v2::freeze(mv2);
|
auto [schema, data] = metadata_v2::freeze(mv2);
|
||||||
|
|
||||||
LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size());
|
LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size());
|
||||||
|
@ -969,11 +969,7 @@ template <typename LoggerPolicy, typename SegmentingPolicy>
|
|||||||
void segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
|
void segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
|
||||||
auto& block = blocks_.back();
|
auto& block = blocks_.back();
|
||||||
block.finalize(stats_);
|
block.finalize(stats_);
|
||||||
block_ready_(block.data(), [blkmgr = blkmgr_,
|
block_ready_(block.data(), block.num());
|
||||||
logical_block_num =
|
|
||||||
block.num()](size_t physical_block_num) {
|
|
||||||
blkmgr->set_written_block(logical_block_num, physical_block_num);
|
|
||||||
});
|
|
||||||
++prog_.block_count;
|
++prog_.block_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,13 +144,14 @@ void run_segmenter_test(unsigned iters, unsigned granularity,
|
|||||||
|
|
||||||
std::vector<std::shared_ptr<dwarfs::block_data>> written;
|
std::vector<std::shared_ptr<dwarfs::block_data>> written;
|
||||||
|
|
||||||
dwarfs::segmenter seg(lgr, prog, blkmgr, cfg, cc, total_size,
|
dwarfs::segmenter seg(
|
||||||
[&written](std::shared_ptr<dwarfs::block_data> blk,
|
lgr, prog, blkmgr, cfg, cc, total_size,
|
||||||
auto physical_block_cb) {
|
[&written, blkmgr](std::shared_ptr<dwarfs::block_data> blk,
|
||||||
size_t num = written.size();
|
auto logical_block_num) {
|
||||||
written.push_back(blk);
|
auto physical_block_num = written.size();
|
||||||
physical_block_cb(num);
|
written.push_back(blk);
|
||||||
});
|
blkmgr->set_written_block(logical_block_num, physical_block_num, 0);
|
||||||
|
});
|
||||||
|
|
||||||
suspender.dismiss();
|
suspender.dismiss();
|
||||||
|
|
||||||
|
@ -361,8 +361,27 @@ struct metadata {
|
|||||||
// fields added with dwarfs-0.7.3, file system version 2.5 //
|
// fields added with dwarfs-0.7.3, file system version 2.5 //
|
||||||
//=========================================================//
|
//=========================================================//
|
||||||
|
|
||||||
|
// We don't need to increment the file system minor version
|
||||||
|
// as file systems created with this new version are still
|
||||||
|
// readable by older binaries as long as they don't use any
|
||||||
|
// unsupported features (e.g. FLAC compression).
|
||||||
|
|
||||||
// The set of features used in this file system image. As long
|
// The set of features used in this file system image. As long
|
||||||
// as an older binary supports all features, it will be able
|
// as an older binary supports all features, it will be able
|
||||||
// to use images created with newer versions.
|
// to use images created with newer versions.
|
||||||
27: optional set<string> features
|
27: optional set<string> features
|
||||||
|
|
||||||
|
//=========================================================//
|
||||||
|
// fields added with dwarfs-0.8.0, file system version 2.5 //
|
||||||
|
//=========================================================//
|
||||||
|
|
||||||
|
// The set of categories used in this file system image. Used
|
||||||
|
// for displaying and to select compression algorithms when
|
||||||
|
// recompressing the image.
|
||||||
|
28: optional list<string> category_names
|
||||||
|
|
||||||
|
// The category of each block in the file system image. The
|
||||||
|
// index into this vector is the block number and the value
|
||||||
|
// is an index into `category_names`.
|
||||||
|
29: optional list<UInt32> block_categories
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user