First take at metadata v2

This commit is contained in:
Marcus Holland-Moritz 2020-11-26 20:56:28 +01:00
parent 5ac0fe1399
commit f373144b73
20 changed files with 1251 additions and 42 deletions

3
.gitmodules vendored
View File

@ -1,3 +1,6 @@
[submodule "folly"]
path = folly
url = https://github.com/facebook/folly
[submodule "fbthrift"]
path = fbthrift
url = https://github.com/facebook/fbthrift/

View File

@ -49,7 +49,12 @@ pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.8.3)
pkg_check_modules(LIBLZMA IMPORTED_TARGET liblzma>=5.2.4)
pkg_check_modules(LIBZSTD IMPORTED_TARGET libzstd>=1.3.8)
set(compiler_only
ON
CACHE BOOL "only build thrift compiler")
add_subdirectory(folly EXCLUDE_FROM_ALL)
add_subdirectory(fbthrift EXCLUDE_FROM_ALL)
if(WITH_TESTS)
# Download and unpack googletest at configure time
@ -99,6 +104,7 @@ list(
src/dwarfs/inode_reader.cpp
src/dwarfs/logger.cpp
src/dwarfs/metadata.cpp
src/dwarfs/metadata_v2.cpp
src/dwarfs/metadata_writer.cpp
src/dwarfs/mmap.cpp
src/dwarfs/options.cpp
@ -132,11 +138,92 @@ if(WITH_TESTS)
gtest_discover_tests(dwarfs_test)
endif()
add_custom_command(
OUTPUT
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_constants.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_constants.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_for_each_field.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_layouts.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_layouts.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_metadata.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_metadata.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.tcc
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_custom_protocol.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_union.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visitation.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_data.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_data.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.tcc
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types_custom_protocol.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_constants.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_constants.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_metadata.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_metadata.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_visitation.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_for_each_field.h
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_visit_union.h
COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift
COMMAND
cp ${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/thrift/frozen.thrift
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/
COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/thrift/metadata.thrift
thrift/dwarfs/metadata.thrift
COMMAND
cd ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs &&
${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1 --gen mstch_cpp2:frozen2
metadata.thrift
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift &&
${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1 --gen mstch_cpp2 frozen.thrift
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1
${CMAKE_CURRENT_SOURCE_DIR}/thrift/metadata.thrift)
list(
APPEND
INCLUDE_DIRS
${CMAKE_CURRENT_BINARY_DIR}/folly
${CMAKE_CURRENT_BINARY_DIR}/thrift
${CMAKE_CURRENT_SOURCE_DIR}/folly
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift
${CMAKE_CURRENT_BINARY_DIR})
add_library(
thrift_light
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/CompactProtocol.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/BinaryProtocol.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/DebugProtocol.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/JSONProtocolCommon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp/protocol/TProtocolException.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp/util/VarintUtils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/gen/module_types_cpp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/frozen/Frozen.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/frozen/FrozenUtil.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/frozen/schema/MemorySchema.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.cpp)
set_property(TARGET thrift_light PROPERTY CXX_STANDARD 17)
target_include_directories(thrift_light PRIVATE ${INCLUDE_DIRS})
add_library(
metadata_thrift
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_layouts.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.cpp
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.cpp)
set_property(TARGET metadata_thrift PROPERTY CXX_STANDARD 17)
target_include_directories(metadata_thrift PRIVATE ${INCLUDE_DIRS})
foreach(tgt dwarfs ${BINARY_TARGETS})
target_include_directories(
${tgt} SYSTEM
PRIVATE ${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/folly
${CMAKE_CURRENT_SOURCE_DIR}/folly)
target_include_directories(${tgt} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}
${INCLUDE_DIRS})
target_include_directories(${tgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
@ -150,9 +237,11 @@ foreach(tgt dwarfs ${BINARY_TARGETS})
target_compile_options(${tgt} PRIVATE -Wall -Wextra -pedantic)
set_property(TARGET ${tgt} PROPERTY CXX_STANDARD 20)
set_property(TARGET ${tgt} PROPERTY CXX_STANDARD 17)
set_property(TARGET ${tgt} PROPERTY CXX_STANDARD_REQUIRED ON)
set_property(TARGET ${tgt} PROPERTY CXX_EXTENSIONS OFF)
add_dependencies(${tgt} metadata_thrift)
endforeach()
target_compile_definitions(dwarfs-bin PRIVATE FUSE_USE_VERSION=35
@ -162,6 +251,8 @@ foreach(tgt ${BINARY_TARGETS})
target_link_libraries(
${tgt}
dwarfs
metadata_thrift
thrift_light
folly
${Boost_LIBRARIES}
PkgConfig::LIBLZ4

1
fbthrift Submodule

@ -0,0 +1 @@
Subproject commit 42536064c10726c50ce07a0ffd0910c17d8781da

View File

@ -23,17 +23,95 @@
#include <array>
#include <functional>
#include <limits>
#include <memory>
#include <string_view>
#include <unordered_map>
#include <vector>
#include <sys/stat.h>
#include "file_interface.h"
#include "fstypes.h"
#include "dwarfs/file_interface.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/gen-cpp2/metadata_types.h"
namespace dwarfs {
struct global_entry_data {
void add_uid(uint16_t uid) { add(uid, uids, next_uid_index); }
void add_gid(uint16_t gid) { add(gid, gids, next_gid_index); }
void add_mode(uint16_t mode) { add(mode, modes, next_mode_index); }
void add(uint16_t val, std::unordered_map<uint16_t, uint16_t>& map,
uint16_t& next_index) {
if (map.emplace(val, next_index).second) {
++next_index;
}
}
void add_time(uint64_t time) {
if (time < timestamp_base) {
timestamp_base = time;
}
}
void add_name(std::string const& name) { names.emplace(name, 0); }
void add_link(std::string const& link) { links.emplace(link, 0); }
void index() {
index(names);
index(links);
}
void index(std::unordered_map<std::string, uint32_t>& map);
uint16_t get_uid_index(uint16_t uid) const { return uids.at(uid); }
uint16_t get_gid_index(uint16_t gid) const { return gids.at(gid); }
uint16_t get_mode_index(uint16_t mode) const { return modes.at(mode); }
uint32_t get_name_index(std::string const& name) const {
return names.at(name);
}
uint32_t get_link_index(std::string const& link) const {
return links.at(link);
}
uint64_t get_time_offset(uint64_t time) const {
return time - timestamp_base;
}
std::vector<uint16_t> get_uids() const;
std::vector<uint16_t> get_gids() const;
std::vector<uint16_t> get_modes() const;
std::vector<std::string> get_names() const;
std::vector<std::string> get_links() const;
// TODO: make private
template <typename T, typename U>
std::vector<T> get_vector(std::unordered_map<T, U> const& map) const;
std::unordered_map<uint16_t, uint16_t> uids;
std::unordered_map<uint16_t, uint16_t> gids;
std::unordered_map<uint16_t, uint16_t> modes;
std::unordered_map<std::string, uint32_t> names;
std::unordered_map<std::string, uint32_t> links;
uint16_t next_uid_index{0};
uint16_t next_gid_index{0};
uint16_t next_mode_index{0};
uint64_t timestamp_base{std::numeric_limits<uint64_t>::max()};
};
class file;
class link;
class dir;
@ -72,6 +150,9 @@ class entry : public file_interface {
void pack(dir_entry& de) const;
void pack(dir_entry_ug& de) const;
void pack(dir_entry_ug_time& de) const;
void
pack(thrift::metadata::entry& entry_v2, global_entry_data const& data) const;
void update(global_entry_data& data) const;
virtual void accept(entry_visitor& v, bool preorder = false) = 0;
virtual uint32_t inode_num() const = 0;
@ -130,8 +211,12 @@ class dir : public entry {
pack(uint8_t* buf,
std::function<void(const entry* e, size_t offset)> const& offset_cb)
const = 0;
virtual void pack(thrift::metadata::metadata& mv2,
global_entry_data const& data) const = 0;
virtual size_t packed_entry_size() const = 0;
virtual void pack_entry(uint8_t* buf) const = 0;
virtual void pack_entry(thrift::metadata::metadata& mv2,
global_entry_data const& data) const = 0;
uint32_t inode_num() const override { return inode_; }
protected:

View File

@ -86,6 +86,8 @@ class filesystem {
void dump(std::ostream& os) const { impl_->dump(os); }
void dump_v2(std::ostream& os) const { impl_->dump_v2(os); }
void walk(std::function<void(const dir_entry*)> const& func) {
impl_->walk(func);
}
@ -143,6 +145,7 @@ class filesystem {
virtual ~impl() = default;
virtual void dump(std::ostream& os) const = 0;
virtual void dump_v2(std::ostream& os) const = 0;
virtual void
walk(std::function<void(const dir_entry*)> const& func) const = 0;
virtual const dir_entry* find(const char* path) const = 0;

View File

@ -54,6 +54,10 @@ class filesystem_writer {
progress& prog, const block_compressor& bc,
size_t max_queue_size);
filesystem_writer(std::ostream& os, logger& lgr, worker_group& wg,
progress& prog, const block_compressor& bc,
const block_compressor& metadata_bc, size_t max_queue_size);
// section create_block();
// section create_metadata();
@ -67,6 +71,10 @@ class filesystem_writer {
impl_->write_metadata(std::move(data));
}
void write_metadata_v2(std::vector<uint8_t>&& data) {
impl_->write_metadata_v2(std::move(data));
}
void flush() { impl_->flush(); }
size_t size() const { return impl_->size(); }
@ -77,6 +85,7 @@ class filesystem_writer {
virtual void write_block(std::vector<uint8_t>&& data) = 0;
virtual void write_metadata(std::vector<uint8_t>&& data) = 0;
virtual void write_metadata_v2(std::vector<uint8_t>&& data) = 0;
virtual void flush() = 0;
virtual size_t size() const = 0;
};

View File

@ -106,6 +106,9 @@ enum class section_type : uint16_t {
// the block size which is needed for working with the
// chunk lists. Also defines inode offsets being used
// and the total inode count (for out-of-bounds checks).
METADATA_V2 = 7,
// Frozen metadata.
};
enum class dir_entry_type : uint8_t {

View File

@ -26,6 +26,10 @@
namespace dwarfs {
namespace thrift::metadata {
struct chunk;
}
class file;
class file_interface;
@ -38,5 +42,7 @@ class inode : public file_interface {
virtual const file_interface* any() const = 0; // TODO
virtual void add_chunk(size_t block, size_t offset, size_t size) = 0;
virtual const std::vector<chunk_type>& chunks() const = 0;
virtual void
append_chunks(std::vector<thrift::metadata::chunk>& vec) const = 0;
};
} // namespace dwarfs

View File

@ -0,0 +1,147 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
#include <functional>
#include <memory>
#include <vector>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#include "fstypes.h"
#include "logger.h"
namespace dwarfs {
class metadata_v2 {
public:
metadata_v2() = default;
metadata_v2(logger& lgr, std::vector<uint8_t>&& data,
const struct ::stat* defaults);
metadata_v2& operator=(metadata_v2&&) = default;
void
dump(std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb) const {
impl_->dump(os, icb);
}
#if 0
static void get_stat_defaults(struct ::stat* defaults);
size_t size() const { return impl_->size(); }
bool empty() const { return !impl_ || impl_->empty(); }
size_t block_size() const { return impl_->block_size(); }
unsigned block_size_bits() const { return impl_->block_size_bits(); }
void walk(std::function<void(const dir_entry*)> const& func) const {
impl_->walk(func);
}
const dir_entry* find(const char* path) const { return impl_->find(path); }
const dir_entry* find(int inode) const { return impl_->find(inode); }
const dir_entry* find(int inode, const char* name) const {
return impl_->find(inode, name);
}
int getattr(const dir_entry* de, struct ::stat* stbuf) const {
return impl_->getattr(de, stbuf);
}
int access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const {
return impl_->access(de, mode, uid, gid);
}
const directory* opendir(const dir_entry* de) const {
return impl_->opendir(de);
}
const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const {
return impl_->readdir(d, offset, name);
}
size_t dirsize(const directory* d) const { return impl_->dirsize(d); }
int readlink(const dir_entry* de, char* buf, size_t size) const {
return impl_->readlink(de, buf, size);
}
int readlink(const dir_entry* de, std::string* buf) const {
return impl_->readlink(de, buf);
}
int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); }
int open(const dir_entry* de) const { return impl_->open(de); }
const chunk_type* get_chunks(int inode, size_t& num) const {
return impl_->get_chunks(inode, num);
}
#endif
class impl {
public:
virtual ~impl() = default;
virtual void dump(
std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb) const = 0;
#if 0
virtual size_t size() const = 0;
virtual bool empty() const = 0;
virtual size_t block_size() const = 0;
virtual unsigned block_size_bits() const = 0;
virtual void
walk(std::function<void(const dir_entry*)> const& func) const = 0;
virtual const dir_entry* find(const char* path) const = 0;
virtual const dir_entry* find(int inode) const = 0;
virtual const dir_entry* find(int inode, const char* name) const = 0;
virtual int getattr(const dir_entry* de, struct ::stat* stbuf) const = 0;
virtual int
access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0;
virtual const directory* opendir(const dir_entry* de) const = 0;
virtual const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const = 0;
virtual size_t dirsize(const directory* d) const = 0;
virtual int readlink(const dir_entry* de, char* buf, size_t size) const = 0;
virtual int readlink(const dir_entry* de, std::string* buf) const = 0;
virtual int statvfs(struct ::statvfs* stbuf) const = 0;
virtual int open(const dir_entry* de) const = 0;
virtual const chunk_type* get_chunks(int inode, size_t& num) const = 0;
#endif
};
private:
std::unique_ptr<impl> impl_;
};
} // namespace dwarfs

View File

@ -28,6 +28,7 @@
#include <unistd.h>
#include <folly/Conv.h>
#include <folly/gen/Base.h>
#include <openssl/sha.h>
@ -39,6 +40,41 @@
namespace dwarfs {
template <typename T, typename U>
std::vector<T>
global_entry_data::get_vector(std::unordered_map<T, U> const& map) const {
using namespace folly::gen;
std::vector<std::pair<T, U>> pairs(map.begin(), map.end());
return from(pairs) | orderBy([](auto const& p) { return p.second; }) |
get<0>() | as<std::vector>();
}
std::vector<uint16_t> global_entry_data::get_uids() const {
return get_vector(uids);
}
std::vector<uint16_t> global_entry_data::get_gids() const {
return get_vector(gids);
}
std::vector<uint16_t> global_entry_data::get_modes() const {
return get_vector(modes);
}
std::vector<std::string> global_entry_data::get_names() const {
return get_vector(names);
}
std::vector<std::string> global_entry_data::get_links() const {
return get_vector(links);
}
void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
using namespace folly::gen;
uint32_t ix = 0;
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
}
template <typename DirEntryType>
class dir_ : public dir {
public:
@ -51,6 +87,13 @@ class dir_ : public dir {
entry::pack(*de);
}
void pack_entry(thrift::metadata::metadata& mv2,
global_entry_data const& data) const override {
mv2.inode_index.at(inode_num()) = mv2.entries.size();
mv2.entries.emplace_back();
entry::pack(mv2.entries.back(), data);
}
size_t packed_size() const override {
return offsetof(directory, u) + sizeof(DirEntryType) * entries_.size();
}
@ -74,6 +117,23 @@ class dir_ : public dir {
++de;
}
}
void pack(thrift::metadata::metadata& mv2,
global_entry_data const& data) const override {
thrift::metadata::directory dir;
dir.self_inode = inode_num();
dir.parent_inode =
has_parent() ? std::dynamic_pointer_cast<dir_>(parent())->inode_num()
: 0;
dir.first_entry = mv2.entries.size();
dir.entry_count = entries_.size();
mv2.directories.push_back(dir);
for (entry_ptr const& e : entries_) {
mv2.inode_index.at(e->inode_num()) = mv2.entries.size();
mv2.entries.emplace_back();
e->pack(mv2.entries.back(), data);
}
}
};
entry::entry(const std::string& name, std::shared_ptr<entry> parent,
@ -156,6 +216,27 @@ void entry::pack(dir_entry_ug_time& de) const {
pack(de.ug);
}
void entry::update(global_entry_data& data) const {
data.add_uid(stat_.st_uid);
data.add_gid(stat_.st_gid);
data.add_mode(stat_.st_mode & 0xFFFF);
data.add_time(stat_.st_atime);
data.add_time(stat_.st_mtime);
data.add_time(stat_.st_ctime);
}
void entry::pack(thrift::metadata::entry& entry_v2,
global_entry_data const& data) const {
entry_v2.name_index = has_parent() ? data.get_name_index(name_) : 0;
entry_v2.mode = data.get_mode_index(stat_.st_mode & 0xFFFF);
entry_v2.owner = data.get_uid_index(stat_.st_uid);
entry_v2.group = data.get_gid_index(stat_.st_gid);
entry_v2.atime = data.get_time_offset(stat_.st_atime);
entry_v2.mtime = data.get_time_offset(stat_.st_mtime);
entry_v2.ctime = data.get_time_offset(stat_.st_ctime);
entry_v2.inode = inode_num();
}
entry::type_t file::type() const { return E_FILE; }
std::string_view file::hash() const {

View File

@ -31,6 +31,7 @@
#include "dwarfs/fstypes.h"
#include "dwarfs/inode_reader.h"
#include "dwarfs/metadata.h"
#include "dwarfs/metadata_v2.h"
#include "dwarfs/progress.h"
namespace dwarfs {
@ -102,6 +103,7 @@ class filesystem_ : public filesystem::impl {
const struct ::stat* stat_defaults, int inode_offset);
void dump(std::ostream& os) const override;
void dump_v2(std::ostream& os) const override;
void walk(std::function<void(const dir_entry*)> const& func) const override;
const dir_entry* find(const char* path) const override;
const dir_entry* find(int inode) const override;
@ -126,6 +128,7 @@ class filesystem_ : public filesystem::impl {
log_proxy<LoggerPolicy> log_;
std::shared_ptr<mmif> mm_;
metadata meta_;
metadata_v2 meta_v2_;
inode_reader ir_;
};
@ -156,6 +159,15 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
stat_defaults, inode_offset);
break;
case section_type::METADATA_V2:
// TODO: handle in-place uncompressed metadata
meta_v2_ =
metadata_v2(lgr,
block_decompressor::decompress(
sh.compression, mm_->as<uint8_t>(start), sh.length),
stat_defaults);
break;
default:
throw std::runtime_error("unknown section");
}
@ -184,6 +196,17 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os) const {
});
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::dump_v2(std::ostream& os) const {
meta_v2_.dump(os, [&](const std::string& indent, uint32_t inode) {
size_t num = 0;
const chunk_type* chunk = meta_.get_chunks(inode, num); // TODO
os << indent << num << " chunks in inode " << inode << "\n";
ir_.dump(os, indent + " ", chunk, num);
});
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::walk(
std::function<void(const dir_entry*)> const& func) const {
@ -325,6 +348,10 @@ void filesystem::rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
writer.write_metadata(std::move(meta_raw));
break;
case section_type::METADATA_V2:
// TODO...
break;
default:
throw std::runtime_error("unknown section");
}

View File

@ -86,20 +86,22 @@ class fsblock {
};
public:
fsblock(section_type type, std::vector<uint8_t>&& data)
fsblock(section_type type, const block_compressor& bc,
std::vector<uint8_t>&& data)
: type_(type)
, bc_(bc)
, uncompressed_size_(data.size())
, state_(std::make_shared<state>(std::move(data))) {}
template <typename LogProxy>
void compress(worker_group& wg, const block_compressor& bc, LogProxy& lp) {
void compress(worker_group& wg, LogProxy& lp) {
lp.trace() << "block queued for compression";
std::shared_ptr<state> s = state_;
wg.add_job([&, bc, s] {
wg.add_job([&, s] {
lp.trace() << "block compression started";
s->compress(bc, lp);
s->compress(bc_, lp);
});
}
@ -107,6 +109,8 @@ class fsblock {
section_type type() const { return type_; }
compression_type compression() const { return bc_.type(); }
const std::vector<uint8_t>& data() const {
return state_->data();
;
@ -118,6 +122,7 @@ class fsblock {
private:
const section_type type_;
block_compressor const& bc_;
const size_t uncompressed_size_;
std::shared_ptr<state> state_;
};
@ -127,17 +132,21 @@ class filesystem_writer_ : public filesystem_writer::impl {
public:
filesystem_writer_(logger& lgr, std::ostream& os, worker_group& wg,
progress& prog, const block_compressor& bc,
const block_compressor& metadata_bc,
size_t max_queue_size);
~filesystem_writer_() noexcept;
void write_block(std::vector<uint8_t>&& data) override;
void write_metadata(std::vector<uint8_t>&& data) override;
void write_metadata_v2(std::vector<uint8_t>&& data) override;
void flush() override;
size_t size() const override { return os_.tellp(); }
private:
void write_section(section_type type, std::vector<uint8_t>&& data);
void write(section_type type, const std::vector<uint8_t>& data);
void write_section(section_type type, std::vector<uint8_t>&& data,
block_compressor const& bc);
void write(section_type type, compression_type compression,
const std::vector<uint8_t>& data);
void write(const char* data, size_t size);
template <typename T>
void write(const T& obj);
@ -150,6 +159,7 @@ class filesystem_writer_ : public filesystem_writer::impl {
worker_group& wg_;
progress& prog_;
const block_compressor& bc_;
const block_compressor& metadata_bc_;
const size_t max_queue_size_;
log_proxy<LoggerPolicy> log_;
std::deque<std::unique_ptr<fsblock>> queue_;
@ -162,11 +172,13 @@ class filesystem_writer_ : public filesystem_writer::impl {
template <typename LoggerPolicy>
filesystem_writer_<LoggerPolicy>::filesystem_writer_(
logger& lgr, std::ostream& os, worker_group& wg, progress& prog,
const block_compressor& bc, size_t max_queue_size)
const block_compressor& bc, const block_compressor& metadata_bc,
size_t max_queue_size)
: os_(os)
, wg_(wg)
, prog_(prog)
, bc_(bc)
, metadata_bc_(metadata_bc)
, max_queue_size_(max_queue_size)
, log_(lgr)
, flush_(false)
@ -219,7 +231,7 @@ void filesystem_writer_<LoggerPolicy>::writer_thread() {
<< size_with_unit(fsb->uncompressed_size()) << " to "
<< size_with_unit(fsb->size());
write(fsb->type(), fsb->data());
write(fsb->type(), fsb->compression(), fsb->data());
}
}
@ -263,10 +275,11 @@ void filesystem_writer_<LoggerPolicy>::write_file_header() {
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write(section_type type,
compression_type compression,
const std::vector<uint8_t>& data) {
section_header sh;
sh.type = type;
sh.compression = bc_.type();
sh.compression = compression;
sh.unused = 0;
sh.length = data.size();
write(sh);
@ -279,7 +292,8 @@ void filesystem_writer_<LoggerPolicy>::write(section_type type,
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_section(
section_type type, std::vector<uint8_t>&& data) {
section_type type, std::vector<uint8_t>&& data,
block_compressor const& bc) {
{
std::unique_lock<std::mutex> lock(mx_);
@ -288,9 +302,9 @@ void filesystem_writer_<LoggerPolicy>::write_section(
}
}
auto fsb = std::make_unique<fsblock>(type, std::move(data));
auto fsb = std::make_unique<fsblock>(type, bc, std::move(data));
fsb->compress(wg_, bc_, log_);
fsb->compress(wg_, log_);
{
std::lock_guard<std::mutex> lock(mx_);
@ -303,13 +317,19 @@ void filesystem_writer_<LoggerPolicy>::write_section(
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_block(
std::vector<uint8_t>&& data) {
write_section(section_type::BLOCK, std::move(data));
write_section(section_type::BLOCK, std::move(data), bc_);
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata(
std::vector<uint8_t>&& data) {
write_section(section_type::METADATA, std::move(data));
write_section(section_type::METADATA, std::move(data), metadata_bc_);
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata_v2(
std::vector<uint8_t>&& data) {
write_section(section_type::METADATA_V2, std::move(data), metadata_bc_);
}
template <typename LoggerPolicy>
@ -333,7 +353,15 @@ filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
worker_group& wg, progress& prog,
const block_compressor& bc,
size_t max_queue_size)
: filesystem_writer(os, lgr, wg, prog, bc, bc, max_queue_size) {}
filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
worker_group& wg, progress& prog,
const block_compressor& bc,
const block_compressor& metadata_bc,
size_t max_queue_size)
: impl_(
make_unique_logging_object<impl, filesystem_writer_, logger_policies>(
lgr, os, wg, prog, bc, max_queue_size)) {}
lgr, os, wg, prog, bc, metadata_bc, max_queue_size)) {}
} // namespace dwarfs

View File

@ -39,7 +39,8 @@ const std::map<section_type, std::string> sections{
SECTION_TYPE_(META_INODE_INDEX),
SECTION_TYPE_(META_CHUNK_INDEX),
SECTION_TYPE_(META_DIRECTORIES),
SECTION_TYPE_(META_CONFIG)
SECTION_TYPE_(META_CONFIG),
SECTION_TYPE_(METADATA_V2),
#undef SECTION_TYPE_
};

View File

@ -28,6 +28,8 @@
#include "dwarfs/inode_manager.h"
#include "dwarfs/script.h"
#include "dwarfs/gen-cpp2/metadata_types.h"
namespace dwarfs {
template <unsigned BlockSizeBits = 24>
@ -76,6 +78,17 @@ class inode_manager_ : public inode_manager {
const std::vector<chunk_type>& chunks() const override { return chunks_; }
void
append_chunks(std::vector<thrift::metadata::chunk>& vec) const override {
for (auto c : chunks_) {
thrift::metadata::chunk chnk;
chnk.block = access::block(c);
chnk.offset = access::offset(c);
chnk.size = access::size(c);
vec.push_back(chnk);
}
}
private:
uint32_t num_{std::numeric_limits<uint32_t>::max()};
file const* file_{nullptr};

537
src/dwarfs/metadata_v2.cpp Normal file
View File

@ -0,0 +1,537 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <cstring>
#include <unistd.h>
#include "dwarfs/metadata_v2.h"
#include "dwarfs/gen-cpp2/metadata_layouts.h"
#include "dwarfs/gen-cpp2/metadata_types.h"
#include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h"
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
#include <thrift/lib/thrift/gen-cpp2/frozen_types_custom_protocol.h>
namespace dwarfs {
// TODO: merge this into the metadata implementation behind interface
template <typename LoggerPolicy>
class metadata_v2_ : public metadata_v2::impl {
public:
template <typename T>
using view = typename ::apache::thrift::frozen::View<T>;
using entry_view = view<thrift::metadata::entry>;
using directory_view = view<thrift::metadata::directory>;
metadata_v2_(logger& lgr, std::vector<uint8_t>&& meta,
const struct ::stat* /*defaults*/)
: data_(std::move(meta))
, meta_(::apache::thrift::frozen::mapFrozen<thrift::metadata::metadata>(
data_))
, root_(meta_.entries()[meta_.inode_index()[0]])
, inode_offset_(meta_.chunk_index_offset())
, log_(lgr) {
// TODO: defaults?
log_.debug() << ::apache::thrift::debugString(meta_.thaw());
::apache::thrift::frozen::Layout<thrift::metadata::metadata> layout;
::apache::thrift::frozen::schema::Schema schema;
folly::ByteRange range(data_);
apache::thrift::CompactSerializer::deserialize(range, schema);
log_.debug() << ::apache::thrift::debugString(schema);
}
void dump(std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb)
const override;
#if 0
size_t size() const override { return data_.size(); }
bool empty() const override { return data_.empty(); }
size_t block_size() const override {
return static_cast<size_t>(1) << cfg_->block_size_bits;
}
unsigned block_size_bits() const override { return cfg_->block_size_bits; }
void walk(std::function<void(const dir_entry*)> const& func) const override;
const dir_entry* find(const char* path) const override;
const dir_entry* find(int inode) const override;
const dir_entry* find(int inode, const char* name) const override;
int getattr(const dir_entry* de, struct ::stat* stbuf) const override;
int access(const dir_entry* de, int mode, uid_t uid,
gid_t gid) const override;
const directory* opendir(const dir_entry* de) const override;
const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const override;
size_t dirsize(const directory* d) const override {
return d->count + 2; // adds '.' and '..', which we fake in ;-)
}
int readlink(const dir_entry* de, char* buf, size_t size) const override;
int readlink(const dir_entry* de, std::string* buf) const override;
int statvfs(struct ::statvfs* stbuf) const override;
int open(const dir_entry* de) const override;
const chunk_type* get_chunks(int inode, size_t& num) const override;
#endif
private:
void dump(std::ostream& os, const std::string& indent, entry_view entry,
std::function<void(const std::string&, uint32_t)> const& icb) const;
void dump(std::ostream& os, const std::string& indent, directory_view dir,
std::function<void(const std::string&, uint32_t)> const& icb) const;
std::string modestring(uint16_t mode) const;
size_t reg_filesize(uint32_t inode) const {
uint32_t cur = meta_.chunk_index()[inode];
uint32_t end = meta_.chunk_index()[inode + 1];
size_t size = 0;
while (cur < end) {
size += meta_.chunks()[cur++].size();
}
return size;
}
size_t filesize(entry_view entry, uint16_t mode) const {
if (S_ISREG(mode)) {
return reg_filesize(entry.inode());
} else if (S_ISLNK(mode)) {
return meta_.links()[meta_.dir_link_index()[entry.inode()]].size();
} else {
return 0;
}
}
#if 0
void walk(const dir_entry* de,
std::function<void(const dir_entry*)> const& func) const;
std::string name(const dir_entry* de) const {
return std::string(as<char>(de->name_offset), de->name_size);
}
size_t linksize(const dir_entry* de) const {
return *as<uint16_t>(de->u.offset);
}
std::string linkname(const dir_entry* de) const {
size_t offs = de->u.offset;
return std::string(as<char>(offs + sizeof(uint16_t)), *as<uint16_t>(offs));
}
const char* linkptr(const dir_entry* de) const {
return as<char>(de->u.offset + sizeof(uint16_t));
}
const directory* getdir(const dir_entry* de) const {
return as<directory>(de->u.offset);
}
template <typename T>
const T* as(size_t offset = 0) const {
return reinterpret_cast<const T*>(
reinterpret_cast<const char*>(data_.data()) + offset);
}
const dir_entry* get_entry(int inode) const {
inode -= inode_offset_;
return inode >= 0 && inode < static_cast<int>(cfg_->inode_count)
? as<dir_entry>(inode_index_[inode])
: nullptr;
}
void parse(const struct ::stat* defaults);
const uint32_t* chunk_index_ = nullptr;
const uint32_t* inode_index_ = nullptr;
const dir_entry* root_ = nullptr;
const meta_config* cfg_ = nullptr;
std::shared_ptr<dir_reader> dir_reader_;
#endif
std::vector<uint8_t> data_;
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata> meta_;
entry_view root_;
const int inode_offset_;
log_proxy<LoggerPolicy> log_;
};
template <typename LoggerPolicy>
void metadata_v2_<LoggerPolicy>::dump(
std::ostream& os, const std::string& indent, entry_view entry,
std::function<void(const std::string&, uint32_t)> const& icb) const {
auto mode = meta_.modes()[entry.mode()];
auto inode = entry.inode();
os << indent << "<inode:" << inode << "> " << modestring(mode);
if (inode > 0) {
os << " " << meta_.names()[entry.name_index()];
}
if (S_ISREG(mode)) {
uint32_t cur = meta_.chunk_index()[inode - inode_offset_];
uint32_t end = meta_.chunk_index()[inode - inode_offset_ + 1];
os << " [" << cur << ", " << end << "]";
size_t size = 0;
while (cur < end) {
size += meta_.chunks()[cur++].size();
}
os << " " << size << "\n";
// os << " " << filesize(entry, mode) << "\n";
// icb(indent + " ", de->inode);
} else if (S_ISDIR(mode)) {
auto dir_index = meta_.dir_link_index()[inode];
os << " => "
<< "<dir:" << dir_index << ">"
<< "\n";
dump(os, indent + " ", meta_.directories()[dir_index], std::move(icb));
} else if (S_ISLNK(mode)) {
os << " -> " << meta_.links()[meta_.dir_link_index()[inode]] << "\n";
} else {
os << " (unknown type)\n";
}
}
template <typename LoggerPolicy>
void metadata_v2_<LoggerPolicy>::dump(
std::ostream& os, const std::string& indent, directory_view dir,
std::function<void(const std::string&, uint32_t)> const& icb) const {
auto count = dir.entry_count();
auto first = dir.first_entry();
os << indent << "(" << count << ") entries\n";
for (size_t i = 0; i < count; ++i) {
dump(os, indent, meta_.entries()[first + i], icb);
}
}
template <typename LoggerPolicy>
void metadata_v2_<LoggerPolicy>::dump(
std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb) const {
dump(os, "", root_, icb);
}
template <typename LoggerPolicy>
std::string metadata_v2_<LoggerPolicy>::modestring(uint16_t mode) const {
std::ostringstream oss;
oss << (mode & S_ISUID ? 'U' : '-');
oss << (mode & S_ISGID ? 'G' : '-');
oss << (mode & S_ISVTX ? 'S' : '-');
oss << (S_ISDIR(mode) ? 'd' : S_ISLNK(mode) ? 'l' : '-');
oss << (mode & S_IRUSR ? 'r' : '-');
oss << (mode & S_IWUSR ? 'w' : '-');
oss << (mode & S_IXUSR ? 'x' : '-');
oss << (mode & S_IRGRP ? 'r' : '-');
oss << (mode & S_IWGRP ? 'w' : '-');
oss << (mode & S_IXGRP ? 'x' : '-');
oss << (mode & S_IROTH ? 'r' : '-');
oss << (mode & S_IWOTH ? 'w' : '-');
oss << (mode & S_IXOTH ? 'x' : '-');
return oss.str();
}
#if 0
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::parse(const struct ::stat* defaults) {
size_t offset = 0;
while (offset + sizeof(section_header) <= size()) {
const section_header* sh = as<section_header>(offset);
log_.debug() << "section_header@" << offset << " (" << sh->to_string()
<< ")";
offset += sizeof(section_header);
if (offset + sh->length > size()) {
throw std::runtime_error("truncated metadata");
}
if (sh->compression != compression_type::NONE) {
throw std::runtime_error("unsupported metadata compression type");
}
switch (sh->type) {
case section_type::META_TABLEDATA:
case section_type::META_DIRECTORIES:
// ok, ignore
break;
case section_type::META_CHUNK_INDEX:
chunk_index_ = as<uint32_t>(offset);
break;
case section_type::META_INODE_INDEX:
inode_index_ = as<uint32_t>(offset);
break;
case section_type::META_CONFIG:
cfg_ = as<meta_config>(offset);
break;
default:
throw std::runtime_error("unknown metadata section");
}
offset += sh->length;
}
// TODO: moar checkz
if (!cfg_) {
throw std::runtime_error("no metadata configuration found");
}
struct ::stat stat_defaults;
if (defaults) {
stat_defaults = *defaults;
} else {
metadata::get_stat_defaults(&stat_defaults);
}
chunk_index_ -= cfg_->chunk_index_offset;
inode_index_ -= cfg_->inode_index_offset;
root_ = as<dir_entry>(inode_index_[0]);
dir_reader_ = dir_reader::create(cfg_->de_type, stat_defaults,
reinterpret_cast<const char*>(data_.data()),
inode_offset_);
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::walk(
const dir_entry* de,
std::function<void(const dir_entry*)> const& func) const {
func(de);
if (S_ISDIR(de->mode)) {
auto dir = getdir(de);
for (size_t i = 0; i < dir->count; ++i) {
walk(dir_reader_->readdir(dir, i), func);
}
}
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::walk(
std::function<void(const dir_entry*)> const& func) const {
walk(root_, func);
}
template <typename LoggerPolicy>
const dir_entry* metadata_<LoggerPolicy>::find(const char* path) const {
while (*path and *path == '/') {
++path;
}
const dir_entry* de = root_;
while (*path) {
const char* next = ::strchr(path, '/');
size_t clen = next ? next - path : ::strlen(path);
de = dir_reader_->find(getdir(de), path, clen);
if (!de) {
break;
}
path = next ? next + 1 : path + clen;
}
return de;
}
template <typename LoggerPolicy>
const dir_entry* metadata_<LoggerPolicy>::find(int inode) const {
return get_entry(inode);
}
template <typename LoggerPolicy>
const dir_entry*
metadata_<LoggerPolicy>::find(int inode, const char* name) const {
auto de = get_entry(inode);
if (de) {
de = dir_reader_->find(getdir(de), name, ::strlen(name));
}
return de;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::getattr(const dir_entry* de,
struct ::stat* stbuf) const {
::memset(stbuf, 0, sizeof(*stbuf));
dir_reader_->getattr(de, stbuf, filesize(de));
return 0;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::access(const dir_entry* de, int mode, uid_t uid,
gid_t gid) const {
return dir_reader_->access(de, mode, uid, gid);
}
template <typename LoggerPolicy>
const directory* metadata_<LoggerPolicy>::opendir(const dir_entry* de) const {
if (S_ISDIR(de->mode)) {
return getdir(de);
}
return nullptr;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::open(const dir_entry* de) const {
if (S_ISREG(de->mode)) {
return de->inode;
}
return -1;
}
template <typename LoggerPolicy>
const dir_entry*
metadata_<LoggerPolicy>::readdir(const directory* d, size_t offset,
std::string* name) const {
const dir_entry* de;
switch (offset) {
case 0:
de = as<dir_entry>(d->self);
if (name) {
name->assign(".");
}
break;
case 1:
de = as<dir_entry>(d->parent);
if (name) {
name->assign("..");
}
break;
default:
offset -= 2;
if (offset < d->count) {
de = dir_reader_->readdir(d, offset, name);
} else {
return nullptr;
}
break;
}
return de;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::readlink(const dir_entry* de, char* buf,
size_t size) const {
if (S_ISLNK(de->mode)) {
size_t lsize = linksize(de);
::memcpy(buf, linkptr(de), std::min(lsize, size));
if (size > lsize) {
buf[lsize] = '\0';
}
return 0;
}
return -EINVAL;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::readlink(const dir_entry* de,
std::string* buf) const {
if (S_ISLNK(de->mode)) {
size_t lsize = linksize(de);
buf->assign(linkptr(de), lsize);
return 0;
}
return -EINVAL;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::statvfs(struct ::statvfs* stbuf) const {
::memset(stbuf, 0, sizeof(*stbuf));
stbuf->f_bsize = 1UL << cfg_->block_size_bits;
stbuf->f_frsize = 1UL;
stbuf->f_blocks = cfg_->orig_fs_size;
stbuf->f_files = cfg_->inode_count;
stbuf->f_flag = ST_RDONLY;
stbuf->f_namemax = PATH_MAX;
return 0;
}
template <typename LoggerPolicy>
const chunk_type*
metadata_<LoggerPolicy>::get_chunks(int inode, size_t& num) const {
inode -= inode_offset_;
if (inode < static_cast<int>(cfg_->chunk_index_offset) ||
inode >= static_cast<int>(cfg_->inode_count)) {
return nullptr;
}
uint32_t off = chunk_index_[inode];
num = (chunk_index_[inode + 1] - off) / sizeof(chunk_type);
return as<chunk_type>(off);
}
void metadata::get_stat_defaults(struct ::stat* defaults) {
::memset(defaults, 0, sizeof(struct ::stat));
defaults->st_uid = ::geteuid();
defaults->st_gid = ::getegid();
time_t t = ::time(nullptr);
defaults->st_atime = t;
defaults->st_mtime = t;
defaults->st_ctime = t;
}
#endif
metadata_v2::metadata_v2(logger& lgr, std::vector<uint8_t>&& data,
const struct ::stat* defaults)
: impl_(make_unique_logging_object<metadata_v2::impl, metadata_v2_,
logger_policies>(lgr, std::move(data),
defaults)) {}
} // namespace dwarfs

View File

@ -58,8 +58,44 @@
#include "dwarfs/script.h"
#include "dwarfs/util.h"
#include "dwarfs/gen-cpp2/metadata_layouts.h"
#include "dwarfs/gen-cpp2/metadata_types.h"
#include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h"
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
#include <thrift/lib/thrift/gen-cpp2/frozen_types_custom_protocol.h>
namespace dwarfs {
namespace {
template <class T>
std::vector<uint8_t> freeze_to_buffer(const T& x) {
using namespace ::apache::thrift::frozen;
Layout<T> layout;
size_t content_size = LayoutRoot::layout(x, layout);
std::string schema;
serializeRootLayout(layout, schema);
size_t schema_size = schema.size();
auto schema_begin = reinterpret_cast<uint8_t const*>(schema.data());
std::vector<uint8_t> buffer(schema_begin, schema_begin + schema_size);
size_t buffer_size = schema_size + content_size;
buffer.resize(buffer_size, 0);
folly::MutableByteRange content_range(&buffer[schema_size], content_size);
ByteRangeFreezer::freeze(layout, x, content_range);
buffer.resize(buffer.size() - content_range.size());
return buffer;
}
} // namespace
template <typename LoggerPolicy>
class scanner_ : public scanner::impl {
public:
@ -225,16 +261,18 @@ class set_inode_visitor : public entry_visitor {
uint32_t inode_no_ = 0;
};
class save_links_visitor : public entry_visitor {
class names_and_links_visitor : public entry_visitor {
public:
save_links_visitor(metadata_writer& mw)
: mw_(mw) {}
names_and_links_visitor(metadata_writer& mw, global_entry_data& data)
: mw_(mw)
, data_(data) {}
void visit(file*) override {
// nothing
}
void visit(file* p) override { data_.add_name(p->name()); }
void visit(link* p) override {
data_.add_name(p->name());
data_.add_link(p->linkname());
const auto& name = p->linkname();
auto r = offset_.emplace(name, mw_.offset());
if (r.second) {
@ -245,19 +283,26 @@ class save_links_visitor : public entry_visitor {
p->set_offset(r.first->second);
}
void visit(dir*) override {
// nothing
void visit(dir* p) override {
if (p->has_parent()) {
data_.add_name(p->name());
}
}
private:
metadata_writer& mw_;
global_entry_data& data_;
std::unordered_map<std::string_view, size_t, folly::Hash> offset_;
};
class save_directories_visitor : public entry_visitor {
public:
save_directories_visitor(metadata_writer& mw, std::vector<uint32_t>& index)
save_directories_visitor(metadata_writer& mw, thrift::metadata::metadata& mv2,
global_entry_data const& ge_data,
std::vector<uint32_t>& index)
: mw_(mw)
, mv2_(mv2)
, ge_data_(ge_data)
, cb_([&](const entry* e, size_t offset) {
index.at(e->inode_num()) = folly::to<uint32_t>(offset);
}) {}
@ -271,17 +316,23 @@ class save_directories_visitor : public entry_visitor {
}
void visit(dir* p) override {
mv2_.dir_link_index.at(p->inode_num()) = mv2_.directories.size();
p->pack(mv2_, ge_data_);
p->set_offset(mw_.offset());
p->pack(mw_.buffer(p->packed_size()), cb_);
if (!p->has_parent()) {
cb_(p, mw_.offset());
p->pack_entry(mw_.buffer(p->packed_entry_size()));
p->pack_entry(mv2_, ge_data_);
}
}
private:
metadata_writer& mw_;
thrift::metadata::metadata& mv2_;
global_entry_data const& ge_data_;
std::function<void(const entry* e, size_t offset)> cb_;
};
@ -382,9 +433,8 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
}
// now scan all files
// TODO: automatically adjust # of worker threads based on load
root->walk([&](entry* ep) {
wg_.add_job([=, this, &prog] {
wg_.add_job([=, &prog] {
if (ep->type() == entry::E_FILE) {
prog.current.store(ep);
ep->scan(*os_, prog);
@ -480,13 +530,18 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.info() << "building metadata...";
std::vector<uint8_t> metadata_vec;
metadata_writer mw(lgr_, metadata_vec);
global_entry_data ge_data;
thrift::metadata::metadata mv2;
mv2.dir_link_index.resize(siv.inode_no());
wg_.add_job([&] {
mw.start_section(section_type::META_TABLEDATA);
log_.info() << "saving links...";
save_links_visitor slv(mw);
root->accept(slv);
names_and_links_visitor nlv(mw, ge_data);
root->accept(nlv);
ge_data.index();
log_.debug() << "link data size = " << mw.section_data_size();
@ -497,6 +552,11 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.info() << "updating name offsets...";
root->walk([&](entry* ep) {
ep->update(ge_data);
if (auto lp = dynamic_cast<link*>(ep)) {
mv2.dir_link_index.at(ep->inode_num()) =
ge_data.get_link_index(lp->linkname());
}
if (ep->has_parent()) {
auto i = name_offset.find(ep->name());
if (i == name_offset.end()) {
@ -536,23 +596,31 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.debug() << "saved by segmenting: "
<< size_with_unit(prog.saved_by_segmentation);
// mv2.string_table = std::string(
// reinterpret_cast<char const*>(mw.section_data()),
// mw.section_data_size());
// TODO: not sure that's actually needed
root->set_name(std::string());
log_.info() << "saving chunks...";
std::vector<uint32_t> index;
index.resize(im->count() + 1);
mv2.chunk_index.resize(im->count() + 1);
// TODO: we should be able to start this once all blocks have been
// submitted for compression
mw.align(im->chunk_size());
im->for_each_inode([&](std::shared_ptr<inode> const& ino) {
index.at(ino->num() - siv.inode_no()) = folly::to<uint32_t>(mw.offset());
mv2.chunk_index.at(ino->num() - siv.inode_no()) = mv2.chunks.size();
mw.write(ino->chunks());
ino->append_chunks(mv2.chunks);
});
// insert dummy inode to help determine number of chunks per inode
index.at(im->count()) = folly::to<uint32_t>(mw.offset());
mv2.chunk_index.at(im->count()) = mv2.chunks.size();
mw.finish_section();
@ -568,8 +636,9 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.info() << "saving directories...";
index.resize(siv.inode_no() + im->count());
mv2.inode_index.resize(siv.inode_no() + im->count());
mw.start_section(section_type::META_DIRECTORIES);
save_directories_visitor sdv(mw, index);
save_directories_visitor sdv(mw, mv2, ge_data, index);
root->accept(sdv);
mw.finish_section();
@ -592,8 +661,37 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
mw.finish_section();
fsw.write_metadata(std::move(metadata_vec));
mv2.uids = ge_data.get_uids();
mv2.gids = ge_data.get_gids();
mv2.modes = ge_data.get_modes();
mv2.names = ge_data.get_names();
mv2.links = ge_data.get_links();
mv2.timestamp_base = ge_data.timestamp_base;
mv2.chunk_index_offset = siv.inode_no();
mv2.total_fs_size = prog.original_size;
fsw.write_metadata_v2(freeze_to_buffer(mv2));
fsw.flush();
// ::apache::thrift::frozen::freezeToFile(mv2, folly::File("metadata.frozen",
// O_RDWR | O_CREAT));
// auto mapping = folly::MemoryMapping("metadata.frozen");
// ::apache::thrift::frozen::Layout<thrift::metadata::metadata> layout;
// ::apache::thrift::frozen::schema::Schema schema;
// auto range = mapping.range();
// apache::thrift::CompactSerializer::deserialize(range, schema);
// log_.info() << ::apache::thrift::debugString(schema);
// auto mapped =
// ::apache::thrift::frozen::mapFrozen<thrift::metadata::metadata>(std::move(mapping));
// log_.info() << ::apache::thrift::debugString(mapped.thaw());
log_.info() << "compressed " << size_with_unit(prog.original_size) << " to "
<< size_with_unit(prog.compressed_size) << " (ratio="
<< static_cast<double>(prog.compressed_size) / prog.original_size

View File

@ -57,7 +57,7 @@ class basic_worker_group : public worker_group::impl, private Policy {
}
for (size_t i = 0; i < num_workers; ++i) {
workers_.emplace_back([=, this] {
workers_.emplace_back([=] {
folly::setThreadName(folly::to<std::string>(group_name, i + 1));
do_work();
});

View File

@ -29,7 +29,7 @@
int main(int argc, char** argv) {
if (argc == 2 || argc == 3) {
try {
dwarfs::stream_logger lgr(std::cerr, dwarfs::logger::INFO);
dwarfs::stream_logger lgr(std::cerr, dwarfs::logger::DEBUG);
dwarfs::filesystem fs(lgr, std::make_shared<dwarfs::mmap>(argv[1]),
dwarfs::block_cache_options());
@ -48,7 +48,8 @@ int main(int argc, char** argv) {
dwarfs::filesystem::identify(
lgr, std::make_shared<dwarfs::mmap>(argv[1]), std::cout);
// TODO:
// fs.dump(std::cout);
fs.dump(std::cout);
fs.dump_v2(std::cout);
}
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;

View File

@ -217,7 +217,7 @@ int mkdwarfs(int argc, char** argv) {
block_manager::config cfg;
std::string path, output, window_sizes, memory_limit, script_path,
compression, log_level;
compression, metadata_compression, log_level;
size_t num_workers, max_scanner_workers;
bool no_time = false, no_owner = false, recompress = false,
no_progress = false;
@ -255,6 +255,9 @@ int mkdwarfs(int argc, char** argv) {
("compression,C",
po::value<std::string>(&compression),
"block compression algorithm")
("metadata-compression",
po::value<std::string>(&metadata_compression),
"metadata compression algorithm (default: same as block compression)")
("recompress",
po::value<bool>(&recompress)->zero_tokens(),
"recompress an existing filesystem")
@ -363,6 +366,10 @@ int mkdwarfs(int argc, char** argv) {
compression = defaults.compression;
}
if (!vm.count("metadata-compression")) {
metadata_compression = compression;
}
if (!vm.count("blockhash-window-sizes")) {
window_sizes = defaults.window_sizes;
}
@ -406,8 +413,9 @@ int mkdwarfs(int argc, char** argv) {
progress prog([&](const progress& p, bool last) { lgr.update(p, last); });
block_compressor bc(compression);
block_compressor metadata_bc(metadata_compression);
std::ofstream ofs(output);
filesystem_writer fsw(ofs, lgr, wg_writer, prog, bc, mem_limit);
filesystem_writer fsw(ofs, lgr, wg_writer, prog, bc, metadata_bc, mem_limit);
if (recompress) {
auto ti = log.timed_info();

67
thrift/metadata.thrift Normal file
View File

@ -0,0 +1,67 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
namespace cpp2 dwarfs.thrift.metadata
typedef i16 (cpp2.type = "uint16_t") UInt16
typedef i32 (cpp2.type = "uint32_t") UInt32
typedef i64 (cpp2.type = "uint64_t") UInt64
struct chunk {
1: required UInt32 block,
2: required UInt32 offset,
3: required UInt32 size,
}
struct directory {
1: required UInt32 self_inode,
2: required UInt32 parent_inode,
3: required UInt32 first_entry,
4: required UInt32 entry_count,
}
struct entry {
1: required UInt32 name_index,
2: required UInt16 mode,
3: required UInt32 inode,
4: required UInt16 owner,
5: required UInt16 group,
6: required UInt64 atime,
7: required UInt64 mtime,
8: required UInt64 ctime,
}
struct metadata {
1: required list<chunk> chunks,
2: required list<UInt32> chunk_index,
3: required list<directory> directories,
4: required list<entry> entries,
5: required list<UInt32> inode_index,
6: required list<UInt32> dir_link_index,
7: required list<UInt16> uids,
8: required list<UInt16> gids,
9: required list<UInt16> modes,
10: required list<string> names,
11: required list<string> links,
12: required UInt64 timestamp_base,
13: required UInt32 chunk_index_offset;
14: required UInt64 total_fs_size;
}