mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-16 15:58:06 -04:00
Section index support for speeding up mount times (fixes #48)
This commit is contained in:
parent
d2ca60b320
commit
c103783d4b
@ -35,6 +35,8 @@ class mmif;
|
||||
class fs_section {
|
||||
public:
|
||||
fs_section(mmif& mm, size_t offset, int version);
|
||||
fs_section(std::shared_ptr<mmif> mm, section_type type, size_t offset,
|
||||
size_t size, int version);
|
||||
|
||||
size_t start() const { return impl_->start(); }
|
||||
size_t length() const { return impl_->length(); }
|
||||
|
@ -65,7 +65,7 @@ struct iovec_read_buf {
|
||||
};
|
||||
|
||||
constexpr uint8_t MAJOR_VERSION = 2;
|
||||
constexpr uint8_t MINOR_VERSION = 3;
|
||||
constexpr uint8_t MINOR_VERSION = 4;
|
||||
|
||||
enum class section_type : uint16_t {
|
||||
BLOCK = 0,
|
||||
@ -76,6 +76,9 @@ enum class section_type : uint16_t {
|
||||
|
||||
METADATA_V2 = 8,
|
||||
// Frozen metadata.
|
||||
|
||||
SECTION_INDEX = 9,
|
||||
// Section index.
|
||||
};
|
||||
|
||||
struct file_header {
|
||||
|
@ -57,6 +57,7 @@ struct filesystem_options {
|
||||
struct filesystem_writer_options {
|
||||
size_t max_queue_size{64 << 20};
|
||||
bool remove_header{false};
|
||||
bool no_section_index{false};
|
||||
};
|
||||
|
||||
struct inode_options {
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
@ -54,6 +55,9 @@ namespace dwarfs {
|
||||
namespace {
|
||||
|
||||
class filesystem_parser {
|
||||
private:
|
||||
static uint64_t constexpr section_offset_mask{(UINT64_C(1) << 48) - 1};
|
||||
|
||||
public:
|
||||
static off_t find_image_offset(mmif& mm, off_t image_offset) {
|
||||
if (image_offset != filesystem_options::IMAGE_OFFSET_AUTO) {
|
||||
@ -148,14 +152,31 @@ class filesystem_parser {
|
||||
major_ = fh->major;
|
||||
minor_ = fh->minor;
|
||||
|
||||
if (minor_ >= 4) {
|
||||
find_index();
|
||||
}
|
||||
|
||||
rewind();
|
||||
}
|
||||
|
||||
std::optional<fs_section> next_section() {
|
||||
if (offset_ < static_cast<off_t>(mm_->size())) {
|
||||
auto section = fs_section(*mm_, offset_, version_);
|
||||
offset_ = section.end();
|
||||
return section;
|
||||
if (index_.empty()) {
|
||||
if (offset_ < static_cast<off_t>(mm_->size())) {
|
||||
auto section = fs_section(*mm_, offset_, version_);
|
||||
offset_ = section.end();
|
||||
return section;
|
||||
}
|
||||
} else {
|
||||
if (offset_ < static_cast<off_t>(index_.size())) {
|
||||
uint64_t id = index_[offset_++];
|
||||
uint64_t offset = id & section_offset_mask;
|
||||
uint64_t next_offset = offset_ < static_cast<off_t>(index_.size())
|
||||
? index_[offset_] & section_offset_mask
|
||||
: mm_->size() - image_offset_;
|
||||
return fs_section(mm_, static_cast<section_type>(id >> 48),
|
||||
image_offset_ + offset, next_offset - offset,
|
||||
version_);
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
@ -169,7 +190,14 @@ class filesystem_parser {
|
||||
}
|
||||
|
||||
void rewind() {
|
||||
offset_ = image_offset_ + (version_ == 1 ? sizeof(file_header) : 0);
|
||||
if (index_.empty()) {
|
||||
offset_ = image_offset_;
|
||||
if (version_ == 1) {
|
||||
offset_ += sizeof(file_header);
|
||||
}
|
||||
} else {
|
||||
offset_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::string version() const {
|
||||
@ -180,13 +208,38 @@ class filesystem_parser {
|
||||
|
||||
bool has_checksums() const { return version_ >= 2; }
|
||||
|
||||
bool has_index() const { return !index_.empty(); }
|
||||
|
||||
private:
|
||||
void find_index() {
|
||||
uint64_t index_pos;
|
||||
|
||||
::memcpy(&index_pos, mm_->as<void>(mm_->size() - sizeof(uint64_t)),
|
||||
sizeof(uint64_t));
|
||||
|
||||
if ((index_pos >> 48) ==
|
||||
static_cast<uint16_t>(section_type::SECTION_INDEX)) {
|
||||
index_pos &= section_offset_mask;
|
||||
index_pos += image_offset_;
|
||||
|
||||
if (index_pos < mm_->size()) {
|
||||
auto section = fs_section(*mm_, index_pos, version_);
|
||||
|
||||
if (section.check_fast(*mm_)) {
|
||||
index_.resize(section.length() / sizeof(uint64_t));
|
||||
::memcpy(index_.data(), section.data(*mm_).data(), section.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<mmif> mm_;
|
||||
off_t const image_offset_;
|
||||
off_t offset_{0};
|
||||
int version_{0};
|
||||
uint8_t major_{0};
|
||||
uint8_t minor_{0};
|
||||
std::vector<uint64_t> index_;
|
||||
};
|
||||
|
||||
using section_map = std::unordered_map<section_type, fs_section>;
|
||||
@ -299,36 +352,68 @@ class filesystem_ final : public filesystem_v2::impl {
|
||||
void set_num_workers(size_t num) override { ir_.set_num_workers(num); }
|
||||
|
||||
private:
|
||||
filesystem_info const& get_info() const;
|
||||
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
std::shared_ptr<mmif> mm_;
|
||||
metadata_v2 meta_;
|
||||
inode_reader_v2 ir_;
|
||||
mutable std::mutex mx_;
|
||||
mutable filesystem_parser parser_;
|
||||
std::vector<uint8_t> meta_buffer_;
|
||||
std::optional<folly::ByteRange> header_;
|
||||
filesystem_info fsinfo_;
|
||||
mutable std::unique_ptr<filesystem_info const> fsinfo_;
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
filesystem_info const& filesystem_<LoggerPolicy>::get_info() const {
|
||||
std::lock_guard lock(mx_);
|
||||
|
||||
if (!fsinfo_) {
|
||||
filesystem_info info;
|
||||
|
||||
parser_.rewind();
|
||||
|
||||
while (auto s = parser_.next_section()) {
|
||||
if (s->type() == section_type::BLOCK) {
|
||||
++info.block_count;
|
||||
info.compressed_block_size += s->length();
|
||||
info.uncompressed_block_size += get_uncompressed_section_size(mm_, *s);
|
||||
} else if (s->type() == section_type::METADATA_V2) {
|
||||
info.compressed_metadata_size += s->length();
|
||||
info.uncompressed_metadata_size +=
|
||||
get_uncompressed_section_size(mm_, *s);
|
||||
}
|
||||
}
|
||||
|
||||
fsinfo_ = std::make_unique<filesystem_info>(info);
|
||||
}
|
||||
|
||||
return *fsinfo_;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
const filesystem_options& options,
|
||||
int inode_offset)
|
||||
: LOG_PROXY_INIT(lgr)
|
||||
, mm_(std::move(mm)) {
|
||||
filesystem_parser parser(mm_, options.image_offset);
|
||||
, mm_(std::move(mm))
|
||||
, parser_(mm_, options.image_offset) {
|
||||
block_cache cache(lgr, mm_, options.block_cache);
|
||||
|
||||
header_ = parser.header();
|
||||
if (parser_.has_index()) {
|
||||
LOG_DEBUG << "found valid section index";
|
||||
}
|
||||
|
||||
header_ = parser_.header();
|
||||
|
||||
section_map sections;
|
||||
|
||||
while (auto s = parser.next_section()) {
|
||||
LOG_DEBUG << "section " << s->description() << " @ " << s->start() << " ["
|
||||
while (auto s = parser_.next_section()) {
|
||||
LOG_DEBUG << "section " << s->name() << " @ " << s->start() << " ["
|
||||
<< s->length() << " bytes]";
|
||||
if (s->type() == section_type::BLOCK) {
|
||||
cache.insert(*s);
|
||||
++fsinfo_.block_count;
|
||||
fsinfo_.compressed_block_size += s->length();
|
||||
fsinfo_.uncompressed_block_size += get_uncompressed_section_size(mm_, *s);
|
||||
} else {
|
||||
if (!s->check_fast(*mm_)) {
|
||||
DWARFS_THROW(runtime_error, "checksum error in section: " + s->name());
|
||||
@ -337,12 +422,6 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
if (!sections.emplace(s->type(), *s).second) {
|
||||
DWARFS_THROW(runtime_error, "duplicate section: " + s->name());
|
||||
}
|
||||
|
||||
if (s->type() == section_type::METADATA_V2) {
|
||||
fsinfo_.compressed_metadata_size += s->length();
|
||||
fsinfo_.uncompressed_metadata_size +=
|
||||
get_uncompressed_section_size(mm_, *s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,7 +429,7 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
|
||||
meta_ = make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_,
|
||||
options.metadata, inode_offset, false,
|
||||
options.lock_mode, !parser.has_checksums());
|
||||
options.lock_mode, !parser_.has_checksums());
|
||||
|
||||
LOG_DEBUG << "read " << cache.block_count() << " blocks and " << meta_.size()
|
||||
<< " bytes of metadata";
|
||||
@ -362,7 +441,7 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
|
||||
meta_.dump(os, detail_level, fsinfo_,
|
||||
meta_.dump(os, detail_level, get_info(),
|
||||
[&](const std::string& indent, uint32_t inode) {
|
||||
if (auto chunks = meta_.get_chunks(inode)) {
|
||||
os << indent << chunks->size() << " chunks in inode " << inode
|
||||
@ -539,7 +618,7 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
|
||||
prog.filesystem_size += s->length();
|
||||
if (s->type() == section_type::BLOCK) {
|
||||
++prog.block_count;
|
||||
} else {
|
||||
} else if (s->type() != section_type::SECTION_INDEX) {
|
||||
if (!sections.emplace(s->type(), *s).second) {
|
||||
DWARFS_THROW(runtime_error, "duplicate section: " + s->name());
|
||||
}
|
||||
|
@ -255,6 +255,8 @@ class filesystem_writer_ final : public filesystem_writer::impl {
|
||||
void write(const T& obj);
|
||||
void write(folly::ByteRange range);
|
||||
void writer_thread();
|
||||
void push_section_index(section_type type);
|
||||
void write_section_index();
|
||||
size_t mem_used() const;
|
||||
|
||||
std::ostream& os_;
|
||||
@ -272,6 +274,8 @@ class filesystem_writer_ final : public filesystem_writer::impl {
|
||||
volatile bool flush_;
|
||||
std::thread writer_thread_;
|
||||
uint32_t section_number_{0};
|
||||
std::vector<uint64_t> section_index_;
|
||||
std::ostream::pos_type header_size_{0};
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -296,6 +300,7 @@ filesystem_writer_<LoggerPolicy>::filesystem_writer_(
|
||||
LOG_WARN << "header will not be written because remove_header is set";
|
||||
} else {
|
||||
os_ << header_->rdbuf();
|
||||
header_size_ = os_.tellp();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -378,6 +383,8 @@ void filesystem_writer_<LoggerPolicy>::write(folly::ByteRange range) {
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write(fsblock const& fsb) {
|
||||
push_section_index(fsb.type());
|
||||
|
||||
write(fsb.header());
|
||||
write(fsb.data());
|
||||
|
||||
@ -434,6 +441,7 @@ void filesystem_writer_<LoggerPolicy>::copy_header(folly::ByteRange header) {
|
||||
LOG_WARN << "replacing old header";
|
||||
} else {
|
||||
write(header);
|
||||
header_size_ = os_.tellp();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -471,6 +479,32 @@ void filesystem_writer_<LoggerPolicy>::flush() {
|
||||
cond_.notify_one();
|
||||
|
||||
writer_thread_.join();
|
||||
|
||||
if (!options_.no_section_index) {
|
||||
write_section_index();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::push_section_index(section_type type) {
|
||||
section_index_.push_back((static_cast<uint64_t>(type) << 48) |
|
||||
static_cast<uint64_t>(os_.tellp() - header_size_));
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_section_index() {
|
||||
push_section_index(section_type::SECTION_INDEX);
|
||||
auto data =
|
||||
folly::ByteRange(reinterpret_cast<uint8_t*>(section_index_.data()),
|
||||
sizeof(section_index_[0]) * section_index_.size());
|
||||
|
||||
auto fsb = fsblock(section_type::SECTION_INDEX, compression_type::NONE, data,
|
||||
section_number_++);
|
||||
|
||||
fsb.compress(wg_);
|
||||
fsb.wait_until_compressed();
|
||||
|
||||
write(fsb);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <mutex>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
@ -143,6 +144,41 @@ class fs_section_v2 : public fs_section::impl {
|
||||
section_header_v2 hdr_;
|
||||
};
|
||||
|
||||
class fs_section_v2_lazy : public fs_section::impl {
|
||||
public:
|
||||
fs_section_v2_lazy(std::shared_ptr<mmif> mm, section_type type, size_t offset,
|
||||
size_t size);
|
||||
|
||||
size_t start() const override { return offset_ + sizeof(section_header_v2); }
|
||||
size_t length() const override { return size_ - sizeof(section_header_v2); }
|
||||
|
||||
compression_type compression() const override {
|
||||
return section().compression();
|
||||
}
|
||||
|
||||
section_type type() const override { return type_; }
|
||||
|
||||
std::string name() const override { return get_section_name(type_); }
|
||||
|
||||
std::string description() const override { return section().description(); }
|
||||
|
||||
bool check_fast(mmif& mm) const override { return section().check_fast(mm); }
|
||||
|
||||
bool verify(mmif& mm) const override { return section().verify(mm); }
|
||||
|
||||
folly::ByteRange data(mmif& mm) const override { return section().data(mm); }
|
||||
|
||||
private:
|
||||
fs_section::impl const& section() const;
|
||||
|
||||
std::mutex mutable mx_;
|
||||
std::unique_ptr<fs_section::impl const> mutable sec_;
|
||||
std::shared_ptr<mmif> mutable mm_;
|
||||
section_type type_;
|
||||
size_t offset_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
fs_section::fs_section(mmif& mm, size_t offset, int version) {
|
||||
switch (version) {
|
||||
case 1:
|
||||
@ -160,6 +196,21 @@ fs_section::fs_section(mmif& mm, size_t offset, int version) {
|
||||
}
|
||||
}
|
||||
|
||||
fs_section::fs_section(std::shared_ptr<mmif> mm, section_type type,
|
||||
size_t offset, size_t size, int version) {
|
||||
switch (version) {
|
||||
case 2:
|
||||
impl_ =
|
||||
std::make_shared<fs_section_v2_lazy>(std::move(mm), type, offset, size);
|
||||
break;
|
||||
|
||||
default:
|
||||
DWARFS_THROW(runtime_error,
|
||||
fmt::format("unsupported section version {} [lazy]", version));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fs_section_v1::fs_section_v1(mmif& mm, size_t offset) {
|
||||
read_section_header_common(hdr_, start_, mm, offset);
|
||||
check_section(*this);
|
||||
@ -170,4 +221,23 @@ fs_section_v2::fs_section_v2(mmif& mm, size_t offset) {
|
||||
check_section(*this);
|
||||
}
|
||||
|
||||
fs_section_v2_lazy::fs_section_v2_lazy(std::shared_ptr<mmif> mm,
|
||||
section_type type, size_t offset,
|
||||
size_t size)
|
||||
: mm_{std::move(mm)}
|
||||
, type_{type}
|
||||
, offset_{offset}
|
||||
, size_{size} {}
|
||||
|
||||
fs_section::impl const& fs_section_v2_lazy::section() const {
|
||||
std::lock_guard lock(mx_);
|
||||
|
||||
if (!sec_) {
|
||||
sec_ = std::make_unique<fs_section_v2>(*mm_, offset_);
|
||||
mm_.reset();
|
||||
}
|
||||
|
||||
return *sec_;
|
||||
}
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -39,6 +39,7 @@ const std::map<section_type, std::string_view> sections{
|
||||
SECTION_TYPE_(BLOCK),
|
||||
SECTION_TYPE_(METADATA_V2_SCHEMA),
|
||||
SECTION_TYPE_(METADATA_V2),
|
||||
SECTION_TYPE_(SECTION_INDEX),
|
||||
#undef SECTION_TYPE_
|
||||
};
|
||||
|
||||
|
@ -334,7 +334,7 @@ int mkdwarfs(int argc, char** argv) {
|
||||
schema_compression, metadata_compression, log_level_str, timestamp,
|
||||
time_resolution, order, progress_mode, recompress_opts, pack_metadata;
|
||||
size_t num_workers;
|
||||
bool no_progress = false, remove_header = false;
|
||||
bool no_progress = false, remove_header = false, no_section_index = false;
|
||||
unsigned level;
|
||||
uint16_t uid, gid;
|
||||
|
||||
@ -439,6 +439,9 @@ int mkdwarfs(int argc, char** argv) {
|
||||
po::value<bool>(&remove_header)->zero_tokens(),
|
||||
"remove any header present before filesystem data"
|
||||
" (use with --recompress)")
|
||||
("no-section-index",
|
||||
po::value<bool>(&no_section_index)->zero_tokens(),
|
||||
"don't add section index to file system")
|
||||
("log-level",
|
||||
po::value<std::string>(&log_level_str)->default_value("info"),
|
||||
"log level (error, warn, info, debug, trace)")
|
||||
@ -803,6 +806,7 @@ int mkdwarfs(int argc, char** argv) {
|
||||
filesystem_writer_options fswopts;
|
||||
fswopts.max_queue_size = mem_limit;
|
||||
fswopts.remove_header = remove_header;
|
||||
fswopts.no_section_index = no_section_index;
|
||||
|
||||
std::unique_ptr<std::ifstream> header_ifs;
|
||||
|
||||
|
@ -1202,7 +1202,27 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
auto mm = std::make_shared<test::mmap_mock>(rewritten4.str());
|
||||
EXPECT_NO_THROW(filesystem_v2::identify(lgr, mm, idss));
|
||||
EXPECT_FALSE(filesystem_v2::header(mm))
|
||||
<< folly::hexDump(rewritten3.str().data(), rewritten3.str().size());
|
||||
<< folly::hexDump(rewritten4.str().data(), rewritten4.str().size());
|
||||
filesystem_v2 fs(lgr, mm);
|
||||
check_dynamic(version, fs);
|
||||
}
|
||||
|
||||
std::ostringstream rewritten5;
|
||||
|
||||
{
|
||||
filesystem_writer_options fsw_opts;
|
||||
fsw_opts.no_section_index = true;
|
||||
filesystem_writer fsw(rewritten5, lgr, wg, prog, bc, fsw_opts);
|
||||
filesystem_v2::rewrite(lgr, prog,
|
||||
std::make_shared<test::mmap_mock>(rewritten4.str()),
|
||||
fsw, opts);
|
||||
}
|
||||
|
||||
{
|
||||
auto mm = std::make_shared<test::mmap_mock>(rewritten5.str());
|
||||
EXPECT_NO_THROW(filesystem_v2::identify(lgr, mm, idss));
|
||||
EXPECT_FALSE(filesystem_v2::header(mm))
|
||||
<< folly::hexDump(rewritten5.str().data(), rewritten5.str().size());
|
||||
filesystem_v2 fs(lgr, mm);
|
||||
check_dynamic(version, fs);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user