mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 03:49:44 -04:00
First take at metadata v2
This commit is contained in:
parent
5ac0fe1399
commit
f373144b73
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +1,6 @@
|
||||
[submodule "folly"]
|
||||
path = folly
|
||||
url = https://github.com/facebook/folly
|
||||
[submodule "fbthrift"]
|
||||
path = fbthrift
|
||||
url = https://github.com/facebook/fbthrift/
|
||||
|
101
CMakeLists.txt
101
CMakeLists.txt
@ -49,7 +49,12 @@ pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.8.3)
|
||||
pkg_check_modules(LIBLZMA IMPORTED_TARGET liblzma>=5.2.4)
|
||||
pkg_check_modules(LIBZSTD IMPORTED_TARGET libzstd>=1.3.8)
|
||||
|
||||
set(compiler_only
|
||||
ON
|
||||
CACHE BOOL "only build thrift compiler")
|
||||
|
||||
add_subdirectory(folly EXCLUDE_FROM_ALL)
|
||||
add_subdirectory(fbthrift EXCLUDE_FROM_ALL)
|
||||
|
||||
if(WITH_TESTS)
|
||||
# Download and unpack googletest at configure time
|
||||
@ -99,6 +104,7 @@ list(
|
||||
src/dwarfs/inode_reader.cpp
|
||||
src/dwarfs/logger.cpp
|
||||
src/dwarfs/metadata.cpp
|
||||
src/dwarfs/metadata_v2.cpp
|
||||
src/dwarfs/metadata_writer.cpp
|
||||
src/dwarfs/mmap.cpp
|
||||
src/dwarfs/options.cpp
|
||||
@ -132,11 +138,92 @@ if(WITH_TESTS)
|
||||
gtest_discover_tests(dwarfs_test)
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_constants.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_constants.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_for_each_field.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_layouts.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_layouts.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_metadata.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_metadata.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.tcc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_custom_protocol.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_union.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visitation.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_data.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_data.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.tcc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types_custom_protocol.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_constants.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_constants.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_metadata.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_metadata.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_visitation.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_for_each_field.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_visit_union.h
|
||||
COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift
|
||||
COMMAND
|
||||
cp ${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/thrift/frozen.thrift
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/
|
||||
COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
|
||||
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/thrift/metadata.thrift
|
||||
thrift/dwarfs/metadata.thrift
|
||||
COMMAND
|
||||
cd ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs &&
|
||||
${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1 --gen mstch_cpp2:frozen2
|
||||
metadata.thrift
|
||||
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift &&
|
||||
${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1 --gen mstch_cpp2 frozen.thrift
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/thrift/metadata.thrift)
|
||||
|
||||
list(
|
||||
APPEND
|
||||
INCLUDE_DIRS
|
||||
${CMAKE_CURRENT_BINARY_DIR}/folly
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/folly
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift
|
||||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
add_library(
|
||||
thrift_light
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/CompactProtocol.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/BinaryProtocol.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/DebugProtocol.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/JSONProtocolCommon.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp/protocol/TProtocolException.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp/util/VarintUtils.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/gen/module_types_cpp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/frozen/Frozen.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/frozen/FrozenUtil.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/frozen/schema/MemorySchema.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/lib/thrift/gen-cpp2/frozen_types.cpp)
|
||||
|
||||
set_property(TARGET thrift_light PROPERTY CXX_STANDARD 17)
|
||||
|
||||
target_include_directories(thrift_light PRIVATE ${INCLUDE_DIRS})
|
||||
|
||||
add_library(
|
||||
metadata_thrift
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_layouts.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.cpp)
|
||||
|
||||
set_property(TARGET metadata_thrift PROPERTY CXX_STANDARD 17)
|
||||
|
||||
target_include_directories(metadata_thrift PRIVATE ${INCLUDE_DIRS})
|
||||
|
||||
foreach(tgt dwarfs ${BINARY_TARGETS})
|
||||
target_include_directories(
|
||||
${tgt} SYSTEM
|
||||
PRIVATE ${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/folly
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/folly)
|
||||
target_include_directories(${tgt} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}
|
||||
${INCLUDE_DIRS})
|
||||
|
||||
target_include_directories(${tgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
|
||||
@ -150,9 +237,11 @@ foreach(tgt dwarfs ${BINARY_TARGETS})
|
||||
|
||||
target_compile_options(${tgt} PRIVATE -Wall -Wextra -pedantic)
|
||||
|
||||
set_property(TARGET ${tgt} PROPERTY CXX_STANDARD 20)
|
||||
set_property(TARGET ${tgt} PROPERTY CXX_STANDARD 17)
|
||||
set_property(TARGET ${tgt} PROPERTY CXX_STANDARD_REQUIRED ON)
|
||||
set_property(TARGET ${tgt} PROPERTY CXX_EXTENSIONS OFF)
|
||||
|
||||
add_dependencies(${tgt} metadata_thrift)
|
||||
endforeach()
|
||||
|
||||
target_compile_definitions(dwarfs-bin PRIVATE FUSE_USE_VERSION=35
|
||||
@ -162,6 +251,8 @@ foreach(tgt ${BINARY_TARGETS})
|
||||
target_link_libraries(
|
||||
${tgt}
|
||||
dwarfs
|
||||
metadata_thrift
|
||||
thrift_light
|
||||
folly
|
||||
${Boost_LIBRARIES}
|
||||
PkgConfig::LIBLZ4
|
||||
|
1
fbthrift
Submodule
1
fbthrift
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 42536064c10726c50ce07a0ffd0910c17d8781da
|
@ -23,17 +23,95 @@
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "file_interface.h"
|
||||
#include "fstypes.h"
|
||||
#include "dwarfs/file_interface.h"
|
||||
#include "dwarfs/fstypes.h"
|
||||
|
||||
#include "dwarfs/gen-cpp2/metadata_types.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
struct global_entry_data {
|
||||
void add_uid(uint16_t uid) { add(uid, uids, next_uid_index); }
|
||||
|
||||
void add_gid(uint16_t gid) { add(gid, gids, next_gid_index); }
|
||||
|
||||
void add_mode(uint16_t mode) { add(mode, modes, next_mode_index); }
|
||||
|
||||
void add(uint16_t val, std::unordered_map<uint16_t, uint16_t>& map,
|
||||
uint16_t& next_index) {
|
||||
if (map.emplace(val, next_index).second) {
|
||||
++next_index;
|
||||
}
|
||||
}
|
||||
|
||||
void add_time(uint64_t time) {
|
||||
if (time < timestamp_base) {
|
||||
timestamp_base = time;
|
||||
}
|
||||
}
|
||||
|
||||
void add_name(std::string const& name) { names.emplace(name, 0); }
|
||||
|
||||
void add_link(std::string const& link) { links.emplace(link, 0); }
|
||||
|
||||
void index() {
|
||||
index(names);
|
||||
index(links);
|
||||
}
|
||||
|
||||
void index(std::unordered_map<std::string, uint32_t>& map);
|
||||
|
||||
uint16_t get_uid_index(uint16_t uid) const { return uids.at(uid); }
|
||||
|
||||
uint16_t get_gid_index(uint16_t gid) const { return gids.at(gid); }
|
||||
|
||||
uint16_t get_mode_index(uint16_t mode) const { return modes.at(mode); }
|
||||
|
||||
uint32_t get_name_index(std::string const& name) const {
|
||||
return names.at(name);
|
||||
}
|
||||
|
||||
uint32_t get_link_index(std::string const& link) const {
|
||||
return links.at(link);
|
||||
}
|
||||
|
||||
uint64_t get_time_offset(uint64_t time) const {
|
||||
return time - timestamp_base;
|
||||
}
|
||||
|
||||
std::vector<uint16_t> get_uids() const;
|
||||
|
||||
std::vector<uint16_t> get_gids() const;
|
||||
|
||||
std::vector<uint16_t> get_modes() const;
|
||||
|
||||
std::vector<std::string> get_names() const;
|
||||
|
||||
std::vector<std::string> get_links() const;
|
||||
|
||||
// TODO: make private
|
||||
template <typename T, typename U>
|
||||
std::vector<T> get_vector(std::unordered_map<T, U> const& map) const;
|
||||
|
||||
std::unordered_map<uint16_t, uint16_t> uids;
|
||||
std::unordered_map<uint16_t, uint16_t> gids;
|
||||
std::unordered_map<uint16_t, uint16_t> modes;
|
||||
std::unordered_map<std::string, uint32_t> names;
|
||||
std::unordered_map<std::string, uint32_t> links;
|
||||
uint16_t next_uid_index{0};
|
||||
uint16_t next_gid_index{0};
|
||||
uint16_t next_mode_index{0};
|
||||
uint64_t timestamp_base{std::numeric_limits<uint64_t>::max()};
|
||||
};
|
||||
|
||||
class file;
|
||||
class link;
|
||||
class dir;
|
||||
@ -72,6 +150,9 @@ class entry : public file_interface {
|
||||
void pack(dir_entry& de) const;
|
||||
void pack(dir_entry_ug& de) const;
|
||||
void pack(dir_entry_ug_time& de) const;
|
||||
void
|
||||
pack(thrift::metadata::entry& entry_v2, global_entry_data const& data) const;
|
||||
void update(global_entry_data& data) const;
|
||||
virtual void accept(entry_visitor& v, bool preorder = false) = 0;
|
||||
virtual uint32_t inode_num() const = 0;
|
||||
|
||||
@ -130,8 +211,12 @@ class dir : public entry {
|
||||
pack(uint8_t* buf,
|
||||
std::function<void(const entry* e, size_t offset)> const& offset_cb)
|
||||
const = 0;
|
||||
virtual void pack(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const = 0;
|
||||
virtual size_t packed_entry_size() const = 0;
|
||||
virtual void pack_entry(uint8_t* buf) const = 0;
|
||||
virtual void pack_entry(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const = 0;
|
||||
uint32_t inode_num() const override { return inode_; }
|
||||
|
||||
protected:
|
||||
|
@ -86,6 +86,8 @@ class filesystem {
|
||||
|
||||
void dump(std::ostream& os) const { impl_->dump(os); }
|
||||
|
||||
void dump_v2(std::ostream& os) const { impl_->dump_v2(os); }
|
||||
|
||||
void walk(std::function<void(const dir_entry*)> const& func) {
|
||||
impl_->walk(func);
|
||||
}
|
||||
@ -143,6 +145,7 @@ class filesystem {
|
||||
virtual ~impl() = default;
|
||||
|
||||
virtual void dump(std::ostream& os) const = 0;
|
||||
virtual void dump_v2(std::ostream& os) const = 0;
|
||||
virtual void
|
||||
walk(std::function<void(const dir_entry*)> const& func) const = 0;
|
||||
virtual const dir_entry* find(const char* path) const = 0;
|
||||
|
@ -54,6 +54,10 @@ class filesystem_writer {
|
||||
progress& prog, const block_compressor& bc,
|
||||
size_t max_queue_size);
|
||||
|
||||
filesystem_writer(std::ostream& os, logger& lgr, worker_group& wg,
|
||||
progress& prog, const block_compressor& bc,
|
||||
const block_compressor& metadata_bc, size_t max_queue_size);
|
||||
|
||||
// section create_block();
|
||||
// section create_metadata();
|
||||
|
||||
@ -67,6 +71,10 @@ class filesystem_writer {
|
||||
impl_->write_metadata(std::move(data));
|
||||
}
|
||||
|
||||
void write_metadata_v2(std::vector<uint8_t>&& data) {
|
||||
impl_->write_metadata_v2(std::move(data));
|
||||
}
|
||||
|
||||
void flush() { impl_->flush(); }
|
||||
|
||||
size_t size() const { return impl_->size(); }
|
||||
@ -77,6 +85,7 @@ class filesystem_writer {
|
||||
|
||||
virtual void write_block(std::vector<uint8_t>&& data) = 0;
|
||||
virtual void write_metadata(std::vector<uint8_t>&& data) = 0;
|
||||
virtual void write_metadata_v2(std::vector<uint8_t>&& data) = 0;
|
||||
virtual void flush() = 0;
|
||||
virtual size_t size() const = 0;
|
||||
};
|
||||
|
@ -106,6 +106,9 @@ enum class section_type : uint16_t {
|
||||
// the block size which is needed for working with the
|
||||
// chunk lists. Also defines inode offsets being used
|
||||
// and the total inode count (for out-of-bounds checks).
|
||||
|
||||
METADATA_V2 = 7,
|
||||
// Frozen metadata.
|
||||
};
|
||||
|
||||
enum class dir_entry_type : uint8_t {
|
||||
|
@ -26,6 +26,10 @@
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace thrift::metadata {
|
||||
struct chunk;
|
||||
}
|
||||
|
||||
class file;
|
||||
class file_interface;
|
||||
|
||||
@ -38,5 +42,7 @@ class inode : public file_interface {
|
||||
virtual const file_interface* any() const = 0; // TODO
|
||||
virtual void add_chunk(size_t block, size_t offset, size_t size) = 0;
|
||||
virtual const std::vector<chunk_type>& chunks() const = 0;
|
||||
virtual void
|
||||
append_chunks(std::vector<thrift::metadata::chunk>& vec) const = 0;
|
||||
};
|
||||
} // namespace dwarfs
|
||||
|
147
include/dwarfs/metadata_v2.h
Normal file
147
include/dwarfs/metadata_v2.h
Normal file
@ -0,0 +1,147 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "fstypes.h"
|
||||
#include "logger.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class metadata_v2 {
|
||||
public:
|
||||
metadata_v2() = default;
|
||||
|
||||
metadata_v2(logger& lgr, std::vector<uint8_t>&& data,
|
||||
const struct ::stat* defaults);
|
||||
|
||||
metadata_v2& operator=(metadata_v2&&) = default;
|
||||
|
||||
void
|
||||
dump(std::ostream& os,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const {
|
||||
impl_->dump(os, icb);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void get_stat_defaults(struct ::stat* defaults);
|
||||
|
||||
size_t size() const { return impl_->size(); }
|
||||
|
||||
bool empty() const { return !impl_ || impl_->empty(); }
|
||||
|
||||
size_t block_size() const { return impl_->block_size(); }
|
||||
|
||||
unsigned block_size_bits() const { return impl_->block_size_bits(); }
|
||||
|
||||
void walk(std::function<void(const dir_entry*)> const& func) const {
|
||||
impl_->walk(func);
|
||||
}
|
||||
|
||||
const dir_entry* find(const char* path) const { return impl_->find(path); }
|
||||
|
||||
const dir_entry* find(int inode) const { return impl_->find(inode); }
|
||||
|
||||
const dir_entry* find(int inode, const char* name) const {
|
||||
return impl_->find(inode, name);
|
||||
}
|
||||
|
||||
int getattr(const dir_entry* de, struct ::stat* stbuf) const {
|
||||
return impl_->getattr(de, stbuf);
|
||||
}
|
||||
|
||||
int access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const {
|
||||
return impl_->access(de, mode, uid, gid);
|
||||
}
|
||||
|
||||
const directory* opendir(const dir_entry* de) const {
|
||||
return impl_->opendir(de);
|
||||
}
|
||||
|
||||
const dir_entry*
|
||||
readdir(const directory* d, size_t offset, std::string* name) const {
|
||||
return impl_->readdir(d, offset, name);
|
||||
}
|
||||
|
||||
size_t dirsize(const directory* d) const { return impl_->dirsize(d); }
|
||||
|
||||
int readlink(const dir_entry* de, char* buf, size_t size) const {
|
||||
return impl_->readlink(de, buf, size);
|
||||
}
|
||||
|
||||
int readlink(const dir_entry* de, std::string* buf) const {
|
||||
return impl_->readlink(de, buf);
|
||||
}
|
||||
|
||||
int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); }
|
||||
|
||||
int open(const dir_entry* de) const { return impl_->open(de); }
|
||||
|
||||
const chunk_type* get_chunks(int inode, size_t& num) const {
|
||||
return impl_->get_chunks(inode, num);
|
||||
}
|
||||
#endif
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
|
||||
virtual void dump(
|
||||
std::ostream& os,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const = 0;
|
||||
#if 0
|
||||
virtual size_t size() const = 0;
|
||||
virtual bool empty() const = 0;
|
||||
virtual size_t block_size() const = 0;
|
||||
virtual unsigned block_size_bits() const = 0;
|
||||
virtual void
|
||||
walk(std::function<void(const dir_entry*)> const& func) const = 0;
|
||||
virtual const dir_entry* find(const char* path) const = 0;
|
||||
virtual const dir_entry* find(int inode) const = 0;
|
||||
virtual const dir_entry* find(int inode, const char* name) const = 0;
|
||||
virtual int getattr(const dir_entry* de, struct ::stat* stbuf) const = 0;
|
||||
virtual int
|
||||
access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0;
|
||||
virtual const directory* opendir(const dir_entry* de) const = 0;
|
||||
virtual const dir_entry*
|
||||
readdir(const directory* d, size_t offset, std::string* name) const = 0;
|
||||
virtual size_t dirsize(const directory* d) const = 0;
|
||||
virtual int readlink(const dir_entry* de, char* buf, size_t size) const = 0;
|
||||
virtual int readlink(const dir_entry* de, std::string* buf) const = 0;
|
||||
virtual int statvfs(struct ::statvfs* stbuf) const = 0;
|
||||
virtual int open(const dir_entry* de) const = 0;
|
||||
virtual const chunk_type* get_chunks(int inode, size_t& num) const = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
private:
|
||||
std::unique_ptr<impl> impl_;
|
||||
};
|
||||
} // namespace dwarfs
|
@ -28,6 +28,7 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include <folly/Conv.h>
|
||||
#include <folly/gen/Base.h>
|
||||
|
||||
#include <openssl/sha.h>
|
||||
|
||||
@ -39,6 +40,41 @@
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
template <typename T, typename U>
|
||||
std::vector<T>
|
||||
global_entry_data::get_vector(std::unordered_map<T, U> const& map) const {
|
||||
using namespace folly::gen;
|
||||
std::vector<std::pair<T, U>> pairs(map.begin(), map.end());
|
||||
return from(pairs) | orderBy([](auto const& p) { return p.second; }) |
|
||||
get<0>() | as<std::vector>();
|
||||
}
|
||||
|
||||
std::vector<uint16_t> global_entry_data::get_uids() const {
|
||||
return get_vector(uids);
|
||||
}
|
||||
|
||||
std::vector<uint16_t> global_entry_data::get_gids() const {
|
||||
return get_vector(gids);
|
||||
}
|
||||
|
||||
std::vector<uint16_t> global_entry_data::get_modes() const {
|
||||
return get_vector(modes);
|
||||
}
|
||||
|
||||
std::vector<std::string> global_entry_data::get_names() const {
|
||||
return get_vector(names);
|
||||
}
|
||||
|
||||
std::vector<std::string> global_entry_data::get_links() const {
|
||||
return get_vector(links);
|
||||
}
|
||||
|
||||
void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
|
||||
using namespace folly::gen;
|
||||
uint32_t ix = 0;
|
||||
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
|
||||
}
|
||||
|
||||
template <typename DirEntryType>
|
||||
class dir_ : public dir {
|
||||
public:
|
||||
@ -51,6 +87,13 @@ class dir_ : public dir {
|
||||
entry::pack(*de);
|
||||
}
|
||||
|
||||
void pack_entry(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const override {
|
||||
mv2.inode_index.at(inode_num()) = mv2.entries.size();
|
||||
mv2.entries.emplace_back();
|
||||
entry::pack(mv2.entries.back(), data);
|
||||
}
|
||||
|
||||
size_t packed_size() const override {
|
||||
return offsetof(directory, u) + sizeof(DirEntryType) * entries_.size();
|
||||
}
|
||||
@ -74,6 +117,23 @@ class dir_ : public dir {
|
||||
++de;
|
||||
}
|
||||
}
|
||||
|
||||
void pack(thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& data) const override {
|
||||
thrift::metadata::directory dir;
|
||||
dir.self_inode = inode_num();
|
||||
dir.parent_inode =
|
||||
has_parent() ? std::dynamic_pointer_cast<dir_>(parent())->inode_num()
|
||||
: 0;
|
||||
dir.first_entry = mv2.entries.size();
|
||||
dir.entry_count = entries_.size();
|
||||
mv2.directories.push_back(dir);
|
||||
for (entry_ptr const& e : entries_) {
|
||||
mv2.inode_index.at(e->inode_num()) = mv2.entries.size();
|
||||
mv2.entries.emplace_back();
|
||||
e->pack(mv2.entries.back(), data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
entry::entry(const std::string& name, std::shared_ptr<entry> parent,
|
||||
@ -156,6 +216,27 @@ void entry::pack(dir_entry_ug_time& de) const {
|
||||
pack(de.ug);
|
||||
}
|
||||
|
||||
void entry::update(global_entry_data& data) const {
|
||||
data.add_uid(stat_.st_uid);
|
||||
data.add_gid(stat_.st_gid);
|
||||
data.add_mode(stat_.st_mode & 0xFFFF);
|
||||
data.add_time(stat_.st_atime);
|
||||
data.add_time(stat_.st_mtime);
|
||||
data.add_time(stat_.st_ctime);
|
||||
}
|
||||
|
||||
void entry::pack(thrift::metadata::entry& entry_v2,
|
||||
global_entry_data const& data) const {
|
||||
entry_v2.name_index = has_parent() ? data.get_name_index(name_) : 0;
|
||||
entry_v2.mode = data.get_mode_index(stat_.st_mode & 0xFFFF);
|
||||
entry_v2.owner = data.get_uid_index(stat_.st_uid);
|
||||
entry_v2.group = data.get_gid_index(stat_.st_gid);
|
||||
entry_v2.atime = data.get_time_offset(stat_.st_atime);
|
||||
entry_v2.mtime = data.get_time_offset(stat_.st_mtime);
|
||||
entry_v2.ctime = data.get_time_offset(stat_.st_ctime);
|
||||
entry_v2.inode = inode_num();
|
||||
}
|
||||
|
||||
entry::type_t file::type() const { return E_FILE; }
|
||||
|
||||
std::string_view file::hash() const {
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "dwarfs/fstypes.h"
|
||||
#include "dwarfs/inode_reader.h"
|
||||
#include "dwarfs/metadata.h"
|
||||
#include "dwarfs/metadata_v2.h"
|
||||
#include "dwarfs/progress.h"
|
||||
|
||||
namespace dwarfs {
|
||||
@ -102,6 +103,7 @@ class filesystem_ : public filesystem::impl {
|
||||
const struct ::stat* stat_defaults, int inode_offset);
|
||||
|
||||
void dump(std::ostream& os) const override;
|
||||
void dump_v2(std::ostream& os) const override;
|
||||
void walk(std::function<void(const dir_entry*)> const& func) const override;
|
||||
const dir_entry* find(const char* path) const override;
|
||||
const dir_entry* find(int inode) const override;
|
||||
@ -126,6 +128,7 @@ class filesystem_ : public filesystem::impl {
|
||||
log_proxy<LoggerPolicy> log_;
|
||||
std::shared_ptr<mmif> mm_;
|
||||
metadata meta_;
|
||||
metadata_v2 meta_v2_;
|
||||
inode_reader ir_;
|
||||
};
|
||||
|
||||
@ -156,6 +159,15 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
stat_defaults, inode_offset);
|
||||
break;
|
||||
|
||||
case section_type::METADATA_V2:
|
||||
// TODO: handle in-place uncompressed metadata
|
||||
meta_v2_ =
|
||||
metadata_v2(lgr,
|
||||
block_decompressor::decompress(
|
||||
sh.compression, mm_->as<uint8_t>(start), sh.length),
|
||||
stat_defaults);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw std::runtime_error("unknown section");
|
||||
}
|
||||
@ -184,6 +196,17 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os) const {
|
||||
});
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_<LoggerPolicy>::dump_v2(std::ostream& os) const {
|
||||
meta_v2_.dump(os, [&](const std::string& indent, uint32_t inode) {
|
||||
size_t num = 0;
|
||||
const chunk_type* chunk = meta_.get_chunks(inode, num); // TODO
|
||||
|
||||
os << indent << num << " chunks in inode " << inode << "\n";
|
||||
ir_.dump(os, indent + " ", chunk, num);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_<LoggerPolicy>::walk(
|
||||
std::function<void(const dir_entry*)> const& func) const {
|
||||
@ -325,6 +348,10 @@ void filesystem::rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
|
||||
writer.write_metadata(std::move(meta_raw));
|
||||
break;
|
||||
|
||||
case section_type::METADATA_V2:
|
||||
// TODO...
|
||||
break;
|
||||
|
||||
default:
|
||||
throw std::runtime_error("unknown section");
|
||||
}
|
||||
|
@ -86,20 +86,22 @@ class fsblock {
|
||||
};
|
||||
|
||||
public:
|
||||
fsblock(section_type type, std::vector<uint8_t>&& data)
|
||||
fsblock(section_type type, const block_compressor& bc,
|
||||
std::vector<uint8_t>&& data)
|
||||
: type_(type)
|
||||
, bc_(bc)
|
||||
, uncompressed_size_(data.size())
|
||||
, state_(std::make_shared<state>(std::move(data))) {}
|
||||
|
||||
template <typename LogProxy>
|
||||
void compress(worker_group& wg, const block_compressor& bc, LogProxy& lp) {
|
||||
void compress(worker_group& wg, LogProxy& lp) {
|
||||
lp.trace() << "block queued for compression";
|
||||
|
||||
std::shared_ptr<state> s = state_;
|
||||
|
||||
wg.add_job([&, bc, s] {
|
||||
wg.add_job([&, s] {
|
||||
lp.trace() << "block compression started";
|
||||
s->compress(bc, lp);
|
||||
s->compress(bc_, lp);
|
||||
});
|
||||
}
|
||||
|
||||
@ -107,6 +109,8 @@ class fsblock {
|
||||
|
||||
section_type type() const { return type_; }
|
||||
|
||||
compression_type compression() const { return bc_.type(); }
|
||||
|
||||
const std::vector<uint8_t>& data() const {
|
||||
return state_->data();
|
||||
;
|
||||
@ -118,6 +122,7 @@ class fsblock {
|
||||
|
||||
private:
|
||||
const section_type type_;
|
||||
block_compressor const& bc_;
|
||||
const size_t uncompressed_size_;
|
||||
std::shared_ptr<state> state_;
|
||||
};
|
||||
@ -127,17 +132,21 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
||||
public:
|
||||
filesystem_writer_(logger& lgr, std::ostream& os, worker_group& wg,
|
||||
progress& prog, const block_compressor& bc,
|
||||
const block_compressor& metadata_bc,
|
||||
size_t max_queue_size);
|
||||
~filesystem_writer_() noexcept;
|
||||
|
||||
void write_block(std::vector<uint8_t>&& data) override;
|
||||
void write_metadata(std::vector<uint8_t>&& data) override;
|
||||
void write_metadata_v2(std::vector<uint8_t>&& data) override;
|
||||
void flush() override;
|
||||
size_t size() const override { return os_.tellp(); }
|
||||
|
||||
private:
|
||||
void write_section(section_type type, std::vector<uint8_t>&& data);
|
||||
void write(section_type type, const std::vector<uint8_t>& data);
|
||||
void write_section(section_type type, std::vector<uint8_t>&& data,
|
||||
block_compressor const& bc);
|
||||
void write(section_type type, compression_type compression,
|
||||
const std::vector<uint8_t>& data);
|
||||
void write(const char* data, size_t size);
|
||||
template <typename T>
|
||||
void write(const T& obj);
|
||||
@ -150,6 +159,7 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
||||
worker_group& wg_;
|
||||
progress& prog_;
|
||||
const block_compressor& bc_;
|
||||
const block_compressor& metadata_bc_;
|
||||
const size_t max_queue_size_;
|
||||
log_proxy<LoggerPolicy> log_;
|
||||
std::deque<std::unique_ptr<fsblock>> queue_;
|
||||
@ -162,11 +172,13 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
||||
template <typename LoggerPolicy>
|
||||
filesystem_writer_<LoggerPolicy>::filesystem_writer_(
|
||||
logger& lgr, std::ostream& os, worker_group& wg, progress& prog,
|
||||
const block_compressor& bc, size_t max_queue_size)
|
||||
const block_compressor& bc, const block_compressor& metadata_bc,
|
||||
size_t max_queue_size)
|
||||
: os_(os)
|
||||
, wg_(wg)
|
||||
, prog_(prog)
|
||||
, bc_(bc)
|
||||
, metadata_bc_(metadata_bc)
|
||||
, max_queue_size_(max_queue_size)
|
||||
, log_(lgr)
|
||||
, flush_(false)
|
||||
@ -219,7 +231,7 @@ void filesystem_writer_<LoggerPolicy>::writer_thread() {
|
||||
<< size_with_unit(fsb->uncompressed_size()) << " to "
|
||||
<< size_with_unit(fsb->size());
|
||||
|
||||
write(fsb->type(), fsb->data());
|
||||
write(fsb->type(), fsb->compression(), fsb->data());
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,10 +275,11 @@ void filesystem_writer_<LoggerPolicy>::write_file_header() {
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write(section_type type,
|
||||
compression_type compression,
|
||||
const std::vector<uint8_t>& data) {
|
||||
section_header sh;
|
||||
sh.type = type;
|
||||
sh.compression = bc_.type();
|
||||
sh.compression = compression;
|
||||
sh.unused = 0;
|
||||
sh.length = data.size();
|
||||
write(sh);
|
||||
@ -279,7 +292,8 @@ void filesystem_writer_<LoggerPolicy>::write(section_type type,
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_section(
|
||||
section_type type, std::vector<uint8_t>&& data) {
|
||||
section_type type, std::vector<uint8_t>&& data,
|
||||
block_compressor const& bc) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mx_);
|
||||
|
||||
@ -288,9 +302,9 @@ void filesystem_writer_<LoggerPolicy>::write_section(
|
||||
}
|
||||
}
|
||||
|
||||
auto fsb = std::make_unique<fsblock>(type, std::move(data));
|
||||
auto fsb = std::make_unique<fsblock>(type, bc, std::move(data));
|
||||
|
||||
fsb->compress(wg_, bc_, log_);
|
||||
fsb->compress(wg_, log_);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mx_);
|
||||
@ -303,13 +317,19 @@ void filesystem_writer_<LoggerPolicy>::write_section(
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_block(
|
||||
std::vector<uint8_t>&& data) {
|
||||
write_section(section_type::BLOCK, std::move(data));
|
||||
write_section(section_type::BLOCK, std::move(data), bc_);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_metadata(
|
||||
std::vector<uint8_t>&& data) {
|
||||
write_section(section_type::METADATA, std::move(data));
|
||||
write_section(section_type::METADATA, std::move(data), metadata_bc_);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_metadata_v2(
|
||||
std::vector<uint8_t>&& data) {
|
||||
write_section(section_type::METADATA_V2, std::move(data), metadata_bc_);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -333,7 +353,15 @@ filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
|
||||
worker_group& wg, progress& prog,
|
||||
const block_compressor& bc,
|
||||
size_t max_queue_size)
|
||||
: filesystem_writer(os, lgr, wg, prog, bc, bc, max_queue_size) {}
|
||||
|
||||
filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
|
||||
worker_group& wg, progress& prog,
|
||||
const block_compressor& bc,
|
||||
const block_compressor& metadata_bc,
|
||||
size_t max_queue_size)
|
||||
: impl_(
|
||||
make_unique_logging_object<impl, filesystem_writer_, logger_policies>(
|
||||
lgr, os, wg, prog, bc, max_queue_size)) {}
|
||||
lgr, os, wg, prog, bc, metadata_bc, max_queue_size)) {}
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -39,7 +39,8 @@ const std::map<section_type, std::string> sections{
|
||||
SECTION_TYPE_(META_INODE_INDEX),
|
||||
SECTION_TYPE_(META_CHUNK_INDEX),
|
||||
SECTION_TYPE_(META_DIRECTORIES),
|
||||
SECTION_TYPE_(META_CONFIG)
|
||||
SECTION_TYPE_(META_CONFIG),
|
||||
SECTION_TYPE_(METADATA_V2),
|
||||
#undef SECTION_TYPE_
|
||||
};
|
||||
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include "dwarfs/inode_manager.h"
|
||||
#include "dwarfs/script.h"
|
||||
|
||||
#include "dwarfs/gen-cpp2/metadata_types.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
template <unsigned BlockSizeBits = 24>
|
||||
@ -76,6 +78,17 @@ class inode_manager_ : public inode_manager {
|
||||
|
||||
const std::vector<chunk_type>& chunks() const override { return chunks_; }
|
||||
|
||||
void
|
||||
append_chunks(std::vector<thrift::metadata::chunk>& vec) const override {
|
||||
for (auto c : chunks_) {
|
||||
thrift::metadata::chunk chnk;
|
||||
chnk.block = access::block(c);
|
||||
chnk.offset = access::offset(c);
|
||||
chnk.size = access::size(c);
|
||||
vec.push_back(chnk);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t num_{std::numeric_limits<uint32_t>::max()};
|
||||
file const* file_{nullptr};
|
||||
|
537
src/dwarfs/metadata_v2.cpp
Normal file
537
src/dwarfs/metadata_v2.cpp
Normal file
@ -0,0 +1,537 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include "dwarfs/metadata_v2.h"
|
||||
|
||||
#include "dwarfs/gen-cpp2/metadata_layouts.h"
|
||||
#include "dwarfs/gen-cpp2/metadata_types.h"
|
||||
#include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h"
|
||||
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
|
||||
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
|
||||
#include <thrift/lib/thrift/gen-cpp2/frozen_types_custom_protocol.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
// TODO: merge this into the metadata implementation behind interface
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
class metadata_v2_ : public metadata_v2::impl {
|
||||
public:
|
||||
template <typename T>
|
||||
using view = typename ::apache::thrift::frozen::View<T>;
|
||||
using entry_view = view<thrift::metadata::entry>;
|
||||
using directory_view = view<thrift::metadata::directory>;
|
||||
|
||||
metadata_v2_(logger& lgr, std::vector<uint8_t>&& meta,
|
||||
const struct ::stat* /*defaults*/)
|
||||
: data_(std::move(meta))
|
||||
, meta_(::apache::thrift::frozen::mapFrozen<thrift::metadata::metadata>(
|
||||
data_))
|
||||
, root_(meta_.entries()[meta_.inode_index()[0]])
|
||||
, inode_offset_(meta_.chunk_index_offset())
|
||||
, log_(lgr) {
|
||||
// TODO: defaults?
|
||||
log_.debug() << ::apache::thrift::debugString(meta_.thaw());
|
||||
|
||||
::apache::thrift::frozen::Layout<thrift::metadata::metadata> layout;
|
||||
::apache::thrift::frozen::schema::Schema schema;
|
||||
folly::ByteRange range(data_);
|
||||
apache::thrift::CompactSerializer::deserialize(range, schema);
|
||||
log_.debug() << ::apache::thrift::debugString(schema);
|
||||
}
|
||||
|
||||
void dump(std::ostream& os,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb)
|
||||
const override;
|
||||
|
||||
#if 0
|
||||
size_t size() const override { return data_.size(); }
|
||||
|
||||
bool empty() const override { return data_.empty(); }
|
||||
|
||||
size_t block_size() const override {
|
||||
return static_cast<size_t>(1) << cfg_->block_size_bits;
|
||||
}
|
||||
|
||||
unsigned block_size_bits() const override { return cfg_->block_size_bits; }
|
||||
|
||||
void walk(std::function<void(const dir_entry*)> const& func) const override;
|
||||
const dir_entry* find(const char* path) const override;
|
||||
const dir_entry* find(int inode) const override;
|
||||
const dir_entry* find(int inode, const char* name) const override;
|
||||
int getattr(const dir_entry* de, struct ::stat* stbuf) const override;
|
||||
int access(const dir_entry* de, int mode, uid_t uid,
|
||||
gid_t gid) const override;
|
||||
const directory* opendir(const dir_entry* de) const override;
|
||||
const dir_entry*
|
||||
readdir(const directory* d, size_t offset, std::string* name) const override;
|
||||
size_t dirsize(const directory* d) const override {
|
||||
return d->count + 2; // adds '.' and '..', which we fake in ;-)
|
||||
}
|
||||
int readlink(const dir_entry* de, char* buf, size_t size) const override;
|
||||
int readlink(const dir_entry* de, std::string* buf) const override;
|
||||
int statvfs(struct ::statvfs* stbuf) const override;
|
||||
int open(const dir_entry* de) const override;
|
||||
|
||||
const chunk_type* get_chunks(int inode, size_t& num) const override;
|
||||
#endif
|
||||
|
||||
private:
|
||||
void dump(std::ostream& os, const std::string& indent, entry_view entry,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const;
|
||||
void dump(std::ostream& os, const std::string& indent, directory_view dir,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const;
|
||||
|
||||
std::string modestring(uint16_t mode) const;
|
||||
|
||||
size_t reg_filesize(uint32_t inode) const {
|
||||
uint32_t cur = meta_.chunk_index()[inode];
|
||||
uint32_t end = meta_.chunk_index()[inode + 1];
|
||||
size_t size = 0;
|
||||
while (cur < end) {
|
||||
size += meta_.chunks()[cur++].size();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t filesize(entry_view entry, uint16_t mode) const {
|
||||
if (S_ISREG(mode)) {
|
||||
return reg_filesize(entry.inode());
|
||||
} else if (S_ISLNK(mode)) {
|
||||
return meta_.links()[meta_.dir_link_index()[entry.inode()]].size();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void walk(const dir_entry* de,
|
||||
std::function<void(const dir_entry*)> const& func) const;
|
||||
|
||||
std::string name(const dir_entry* de) const {
|
||||
return std::string(as<char>(de->name_offset), de->name_size);
|
||||
}
|
||||
|
||||
size_t linksize(const dir_entry* de) const {
|
||||
return *as<uint16_t>(de->u.offset);
|
||||
}
|
||||
|
||||
std::string linkname(const dir_entry* de) const {
|
||||
size_t offs = de->u.offset;
|
||||
return std::string(as<char>(offs + sizeof(uint16_t)), *as<uint16_t>(offs));
|
||||
}
|
||||
|
||||
const char* linkptr(const dir_entry* de) const {
|
||||
return as<char>(de->u.offset + sizeof(uint16_t));
|
||||
}
|
||||
|
||||
const directory* getdir(const dir_entry* de) const {
|
||||
return as<directory>(de->u.offset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T* as(size_t offset = 0) const {
|
||||
return reinterpret_cast<const T*>(
|
||||
reinterpret_cast<const char*>(data_.data()) + offset);
|
||||
}
|
||||
|
||||
const dir_entry* get_entry(int inode) const {
|
||||
inode -= inode_offset_;
|
||||
return inode >= 0 && inode < static_cast<int>(cfg_->inode_count)
|
||||
? as<dir_entry>(inode_index_[inode])
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
void parse(const struct ::stat* defaults);
|
||||
|
||||
const uint32_t* chunk_index_ = nullptr;
|
||||
const uint32_t* inode_index_ = nullptr;
|
||||
const dir_entry* root_ = nullptr;
|
||||
const meta_config* cfg_ = nullptr;
|
||||
std::shared_ptr<dir_reader> dir_reader_;
|
||||
#endif
|
||||
std::vector<uint8_t> data_;
|
||||
::apache::thrift::frozen::MappedFrozen<thrift::metadata::metadata> meta_;
|
||||
entry_view root_;
|
||||
const int inode_offset_;
|
||||
log_proxy<LoggerPolicy> log_;
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_v2_<LoggerPolicy>::dump(
|
||||
std::ostream& os, const std::string& indent, entry_view entry,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const {
|
||||
auto mode = meta_.modes()[entry.mode()];
|
||||
auto inode = entry.inode();
|
||||
|
||||
os << indent << "<inode:" << inode << "> " << modestring(mode);
|
||||
|
||||
if (inode > 0) {
|
||||
os << " " << meta_.names()[entry.name_index()];
|
||||
}
|
||||
|
||||
if (S_ISREG(mode)) {
|
||||
uint32_t cur = meta_.chunk_index()[inode - inode_offset_];
|
||||
uint32_t end = meta_.chunk_index()[inode - inode_offset_ + 1];
|
||||
os << " [" << cur << ", " << end << "]";
|
||||
size_t size = 0;
|
||||
while (cur < end) {
|
||||
size += meta_.chunks()[cur++].size();
|
||||
}
|
||||
os << " " << size << "\n";
|
||||
// os << " " << filesize(entry, mode) << "\n";
|
||||
// icb(indent + " ", de->inode);
|
||||
} else if (S_ISDIR(mode)) {
|
||||
auto dir_index = meta_.dir_link_index()[inode];
|
||||
os << " => "
|
||||
<< "<dir:" << dir_index << ">"
|
||||
<< "\n";
|
||||
dump(os, indent + " ", meta_.directories()[dir_index], std::move(icb));
|
||||
} else if (S_ISLNK(mode)) {
|
||||
os << " -> " << meta_.links()[meta_.dir_link_index()[inode]] << "\n";
|
||||
} else {
|
||||
os << " (unknown type)\n";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_v2_<LoggerPolicy>::dump(
|
||||
std::ostream& os, const std::string& indent, directory_view dir,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const {
|
||||
auto count = dir.entry_count();
|
||||
auto first = dir.first_entry();
|
||||
os << indent << "(" << count << ") entries\n";
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
dump(os, indent, meta_.entries()[first + i], icb);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_v2_<LoggerPolicy>::dump(
|
||||
std::ostream& os,
|
||||
std::function<void(const std::string&, uint32_t)> const& icb) const {
|
||||
dump(os, "", root_, icb);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
std::string metadata_v2_<LoggerPolicy>::modestring(uint16_t mode) const {
|
||||
std::ostringstream oss;
|
||||
|
||||
oss << (mode & S_ISUID ? 'U' : '-');
|
||||
oss << (mode & S_ISGID ? 'G' : '-');
|
||||
oss << (mode & S_ISVTX ? 'S' : '-');
|
||||
oss << (S_ISDIR(mode) ? 'd' : S_ISLNK(mode) ? 'l' : '-');
|
||||
oss << (mode & S_IRUSR ? 'r' : '-');
|
||||
oss << (mode & S_IWUSR ? 'w' : '-');
|
||||
oss << (mode & S_IXUSR ? 'x' : '-');
|
||||
oss << (mode & S_IRGRP ? 'r' : '-');
|
||||
oss << (mode & S_IWGRP ? 'w' : '-');
|
||||
oss << (mode & S_IXGRP ? 'x' : '-');
|
||||
oss << (mode & S_IROTH ? 'r' : '-');
|
||||
oss << (mode & S_IWOTH ? 'w' : '-');
|
||||
oss << (mode & S_IXOTH ? 'x' : '-');
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
#if 0
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_<LoggerPolicy>::parse(const struct ::stat* defaults) {
|
||||
size_t offset = 0;
|
||||
|
||||
while (offset + sizeof(section_header) <= size()) {
|
||||
const section_header* sh = as<section_header>(offset);
|
||||
|
||||
log_.debug() << "section_header@" << offset << " (" << sh->to_string()
|
||||
<< ")";
|
||||
|
||||
offset += sizeof(section_header);
|
||||
|
||||
if (offset + sh->length > size()) {
|
||||
throw std::runtime_error("truncated metadata");
|
||||
}
|
||||
|
||||
if (sh->compression != compression_type::NONE) {
|
||||
throw std::runtime_error("unsupported metadata compression type");
|
||||
}
|
||||
|
||||
switch (sh->type) {
|
||||
case section_type::META_TABLEDATA:
|
||||
case section_type::META_DIRECTORIES:
|
||||
// ok, ignore
|
||||
break;
|
||||
|
||||
case section_type::META_CHUNK_INDEX:
|
||||
chunk_index_ = as<uint32_t>(offset);
|
||||
break;
|
||||
|
||||
case section_type::META_INODE_INDEX:
|
||||
inode_index_ = as<uint32_t>(offset);
|
||||
break;
|
||||
|
||||
case section_type::META_CONFIG:
|
||||
cfg_ = as<meta_config>(offset);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw std::runtime_error("unknown metadata section");
|
||||
}
|
||||
|
||||
offset += sh->length;
|
||||
}
|
||||
|
||||
// TODO: moar checkz
|
||||
|
||||
if (!cfg_) {
|
||||
throw std::runtime_error("no metadata configuration found");
|
||||
}
|
||||
|
||||
struct ::stat stat_defaults;
|
||||
|
||||
if (defaults) {
|
||||
stat_defaults = *defaults;
|
||||
} else {
|
||||
metadata::get_stat_defaults(&stat_defaults);
|
||||
}
|
||||
|
||||
chunk_index_ -= cfg_->chunk_index_offset;
|
||||
inode_index_ -= cfg_->inode_index_offset;
|
||||
|
||||
root_ = as<dir_entry>(inode_index_[0]);
|
||||
|
||||
dir_reader_ = dir_reader::create(cfg_->de_type, stat_defaults,
|
||||
reinterpret_cast<const char*>(data_.data()),
|
||||
inode_offset_);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_<LoggerPolicy>::walk(
|
||||
const dir_entry* de,
|
||||
std::function<void(const dir_entry*)> const& func) const {
|
||||
func(de);
|
||||
if (S_ISDIR(de->mode)) {
|
||||
auto dir = getdir(de);
|
||||
for (size_t i = 0; i < dir->count; ++i) {
|
||||
walk(dir_reader_->readdir(dir, i), func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_<LoggerPolicy>::walk(
|
||||
std::function<void(const dir_entry*)> const& func) const {
|
||||
walk(root_, func);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
const dir_entry* metadata_<LoggerPolicy>::find(const char* path) const {
|
||||
while (*path and *path == '/') {
|
||||
++path;
|
||||
}
|
||||
|
||||
const dir_entry* de = root_;
|
||||
|
||||
while (*path) {
|
||||
const char* next = ::strchr(path, '/');
|
||||
size_t clen = next ? next - path : ::strlen(path);
|
||||
|
||||
de = dir_reader_->find(getdir(de), path, clen);
|
||||
|
||||
if (!de) {
|
||||
break;
|
||||
}
|
||||
|
||||
path = next ? next + 1 : path + clen;
|
||||
}
|
||||
|
||||
return de;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
const dir_entry* metadata_<LoggerPolicy>::find(int inode) const {
|
||||
return get_entry(inode);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
const dir_entry*
|
||||
metadata_<LoggerPolicy>::find(int inode, const char* name) const {
|
||||
auto de = get_entry(inode);
|
||||
|
||||
if (de) {
|
||||
de = dir_reader_->find(getdir(de), name, ::strlen(name));
|
||||
}
|
||||
|
||||
return de;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
int metadata_<LoggerPolicy>::getattr(const dir_entry* de,
|
||||
struct ::stat* stbuf) const {
|
||||
::memset(stbuf, 0, sizeof(*stbuf));
|
||||
dir_reader_->getattr(de, stbuf, filesize(de));
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
int metadata_<LoggerPolicy>::access(const dir_entry* de, int mode, uid_t uid,
|
||||
gid_t gid) const {
|
||||
return dir_reader_->access(de, mode, uid, gid);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
const directory* metadata_<LoggerPolicy>::opendir(const dir_entry* de) const {
|
||||
if (S_ISDIR(de->mode)) {
|
||||
return getdir(de);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
int metadata_<LoggerPolicy>::open(const dir_entry* de) const {
|
||||
if (S_ISREG(de->mode)) {
|
||||
return de->inode;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
const dir_entry*
|
||||
metadata_<LoggerPolicy>::readdir(const directory* d, size_t offset,
|
||||
std::string* name) const {
|
||||
const dir_entry* de;
|
||||
|
||||
switch (offset) {
|
||||
case 0:
|
||||
de = as<dir_entry>(d->self);
|
||||
|
||||
if (name) {
|
||||
name->assign(".");
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
de = as<dir_entry>(d->parent);
|
||||
|
||||
if (name) {
|
||||
name->assign("..");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
offset -= 2;
|
||||
|
||||
if (offset < d->count) {
|
||||
de = dir_reader_->readdir(d, offset, name);
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return de;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
int metadata_<LoggerPolicy>::readlink(const dir_entry* de, char* buf,
|
||||
size_t size) const {
|
||||
if (S_ISLNK(de->mode)) {
|
||||
size_t lsize = linksize(de);
|
||||
|
||||
::memcpy(buf, linkptr(de), std::min(lsize, size));
|
||||
|
||||
if (size > lsize) {
|
||||
buf[lsize] = '\0';
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
int metadata_<LoggerPolicy>::readlink(const dir_entry* de,
|
||||
std::string* buf) const {
|
||||
if (S_ISLNK(de->mode)) {
|
||||
size_t lsize = linksize(de);
|
||||
|
||||
buf->assign(linkptr(de), lsize);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
int metadata_<LoggerPolicy>::statvfs(struct ::statvfs* stbuf) const {
|
||||
::memset(stbuf, 0, sizeof(*stbuf));
|
||||
|
||||
stbuf->f_bsize = 1UL << cfg_->block_size_bits;
|
||||
stbuf->f_frsize = 1UL;
|
||||
stbuf->f_blocks = cfg_->orig_fs_size;
|
||||
stbuf->f_files = cfg_->inode_count;
|
||||
stbuf->f_flag = ST_RDONLY;
|
||||
stbuf->f_namemax = PATH_MAX;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
const chunk_type*
|
||||
metadata_<LoggerPolicy>::get_chunks(int inode, size_t& num) const {
|
||||
inode -= inode_offset_;
|
||||
if (inode < static_cast<int>(cfg_->chunk_index_offset) ||
|
||||
inode >= static_cast<int>(cfg_->inode_count)) {
|
||||
return nullptr;
|
||||
}
|
||||
uint32_t off = chunk_index_[inode];
|
||||
num = (chunk_index_[inode + 1] - off) / sizeof(chunk_type);
|
||||
return as<chunk_type>(off);
|
||||
}
|
||||
|
||||
void metadata::get_stat_defaults(struct ::stat* defaults) {
|
||||
::memset(defaults, 0, sizeof(struct ::stat));
|
||||
defaults->st_uid = ::geteuid();
|
||||
defaults->st_gid = ::getegid();
|
||||
time_t t = ::time(nullptr);
|
||||
defaults->st_atime = t;
|
||||
defaults->st_mtime = t;
|
||||
defaults->st_ctime = t;
|
||||
}
|
||||
#endif
|
||||
|
||||
metadata_v2::metadata_v2(logger& lgr, std::vector<uint8_t>&& data,
|
||||
const struct ::stat* defaults)
|
||||
: impl_(make_unique_logging_object<metadata_v2::impl, metadata_v2_,
|
||||
logger_policies>(lgr, std::move(data),
|
||||
defaults)) {}
|
||||
} // namespace dwarfs
|
@ -58,8 +58,44 @@
|
||||
#include "dwarfs/script.h"
|
||||
#include "dwarfs/util.h"
|
||||
|
||||
#include "dwarfs/gen-cpp2/metadata_layouts.h"
|
||||
#include "dwarfs/gen-cpp2/metadata_types.h"
|
||||
#include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h"
|
||||
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
|
||||
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
|
||||
#include <thrift/lib/thrift/gen-cpp2/frozen_types_custom_protocol.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace {
|
||||
|
||||
template <class T>
|
||||
std::vector<uint8_t> freeze_to_buffer(const T& x) {
|
||||
using namespace ::apache::thrift::frozen;
|
||||
|
||||
Layout<T> layout;
|
||||
size_t content_size = LayoutRoot::layout(x, layout);
|
||||
|
||||
std::string schema;
|
||||
serializeRootLayout(layout, schema);
|
||||
|
||||
size_t schema_size = schema.size();
|
||||
auto schema_begin = reinterpret_cast<uint8_t const*>(schema.data());
|
||||
std::vector<uint8_t> buffer(schema_begin, schema_begin + schema_size);
|
||||
|
||||
size_t buffer_size = schema_size + content_size;
|
||||
buffer.resize(buffer_size, 0);
|
||||
|
||||
folly::MutableByteRange content_range(&buffer[schema_size], content_size);
|
||||
ByteRangeFreezer::freeze(layout, x, content_range);
|
||||
|
||||
buffer.resize(buffer.size() - content_range.size());
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
class scanner_ : public scanner::impl {
|
||||
public:
|
||||
@ -225,16 +261,18 @@ class set_inode_visitor : public entry_visitor {
|
||||
uint32_t inode_no_ = 0;
|
||||
};
|
||||
|
||||
class save_links_visitor : public entry_visitor {
|
||||
class names_and_links_visitor : public entry_visitor {
|
||||
public:
|
||||
save_links_visitor(metadata_writer& mw)
|
||||
: mw_(mw) {}
|
||||
names_and_links_visitor(metadata_writer& mw, global_entry_data& data)
|
||||
: mw_(mw)
|
||||
, data_(data) {}
|
||||
|
||||
void visit(file*) override {
|
||||
// nothing
|
||||
}
|
||||
void visit(file* p) override { data_.add_name(p->name()); }
|
||||
|
||||
void visit(link* p) override {
|
||||
data_.add_name(p->name());
|
||||
data_.add_link(p->linkname());
|
||||
|
||||
const auto& name = p->linkname();
|
||||
auto r = offset_.emplace(name, mw_.offset());
|
||||
if (r.second) {
|
||||
@ -245,19 +283,26 @@ class save_links_visitor : public entry_visitor {
|
||||
p->set_offset(r.first->second);
|
||||
}
|
||||
|
||||
void visit(dir*) override {
|
||||
// nothing
|
||||
void visit(dir* p) override {
|
||||
if (p->has_parent()) {
|
||||
data_.add_name(p->name());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
metadata_writer& mw_;
|
||||
global_entry_data& data_;
|
||||
std::unordered_map<std::string_view, size_t, folly::Hash> offset_;
|
||||
};
|
||||
|
||||
class save_directories_visitor : public entry_visitor {
|
||||
public:
|
||||
save_directories_visitor(metadata_writer& mw, std::vector<uint32_t>& index)
|
||||
save_directories_visitor(metadata_writer& mw, thrift::metadata::metadata& mv2,
|
||||
global_entry_data const& ge_data,
|
||||
std::vector<uint32_t>& index)
|
||||
: mw_(mw)
|
||||
, mv2_(mv2)
|
||||
, ge_data_(ge_data)
|
||||
, cb_([&](const entry* e, size_t offset) {
|
||||
index.at(e->inode_num()) = folly::to<uint32_t>(offset);
|
||||
}) {}
|
||||
@ -271,17 +316,23 @@ class save_directories_visitor : public entry_visitor {
|
||||
}
|
||||
|
||||
void visit(dir* p) override {
|
||||
mv2_.dir_link_index.at(p->inode_num()) = mv2_.directories.size();
|
||||
p->pack(mv2_, ge_data_);
|
||||
|
||||
p->set_offset(mw_.offset());
|
||||
p->pack(mw_.buffer(p->packed_size()), cb_);
|
||||
|
||||
if (!p->has_parent()) {
|
||||
cb_(p, mw_.offset());
|
||||
p->pack_entry(mw_.buffer(p->packed_entry_size()));
|
||||
p->pack_entry(mv2_, ge_data_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
metadata_writer& mw_;
|
||||
thrift::metadata::metadata& mv2_;
|
||||
global_entry_data const& ge_data_;
|
||||
std::function<void(const entry* e, size_t offset)> cb_;
|
||||
};
|
||||
|
||||
@ -382,9 +433,8 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
}
|
||||
|
||||
// now scan all files
|
||||
// TODO: automatically adjust # of worker threads based on load
|
||||
root->walk([&](entry* ep) {
|
||||
wg_.add_job([=, this, &prog] {
|
||||
wg_.add_job([=, &prog] {
|
||||
if (ep->type() == entry::E_FILE) {
|
||||
prog.current.store(ep);
|
||||
ep->scan(*os_, prog);
|
||||
@ -480,13 +530,18 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
log_.info() << "building metadata...";
|
||||
std::vector<uint8_t> metadata_vec;
|
||||
metadata_writer mw(lgr_, metadata_vec);
|
||||
global_entry_data ge_data;
|
||||
thrift::metadata::metadata mv2;
|
||||
mv2.dir_link_index.resize(siv.inode_no());
|
||||
|
||||
wg_.add_job([&] {
|
||||
mw.start_section(section_type::META_TABLEDATA);
|
||||
|
||||
log_.info() << "saving links...";
|
||||
save_links_visitor slv(mw);
|
||||
root->accept(slv);
|
||||
names_and_links_visitor nlv(mw, ge_data);
|
||||
root->accept(nlv);
|
||||
|
||||
ge_data.index();
|
||||
|
||||
log_.debug() << "link data size = " << mw.section_data_size();
|
||||
|
||||
@ -497,6 +552,11 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
|
||||
log_.info() << "updating name offsets...";
|
||||
root->walk([&](entry* ep) {
|
||||
ep->update(ge_data);
|
||||
if (auto lp = dynamic_cast<link*>(ep)) {
|
||||
mv2.dir_link_index.at(ep->inode_num()) =
|
||||
ge_data.get_link_index(lp->linkname());
|
||||
}
|
||||
if (ep->has_parent()) {
|
||||
auto i = name_offset.find(ep->name());
|
||||
if (i == name_offset.end()) {
|
||||
@ -536,23 +596,31 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
log_.debug() << "saved by segmenting: "
|
||||
<< size_with_unit(prog.saved_by_segmentation);
|
||||
|
||||
// mv2.string_table = std::string(
|
||||
// reinterpret_cast<char const*>(mw.section_data()),
|
||||
// mw.section_data_size());
|
||||
|
||||
// TODO: not sure that's actually needed
|
||||
root->set_name(std::string());
|
||||
|
||||
log_.info() << "saving chunks...";
|
||||
std::vector<uint32_t> index;
|
||||
index.resize(im->count() + 1);
|
||||
mv2.chunk_index.resize(im->count() + 1);
|
||||
|
||||
// TODO: we should be able to start this once all blocks have been
|
||||
// submitted for compression
|
||||
mw.align(im->chunk_size());
|
||||
im->for_each_inode([&](std::shared_ptr<inode> const& ino) {
|
||||
index.at(ino->num() - siv.inode_no()) = folly::to<uint32_t>(mw.offset());
|
||||
mv2.chunk_index.at(ino->num() - siv.inode_no()) = mv2.chunks.size();
|
||||
mw.write(ino->chunks());
|
||||
ino->append_chunks(mv2.chunks);
|
||||
});
|
||||
|
||||
// insert dummy inode to help determine number of chunks per inode
|
||||
index.at(im->count()) = folly::to<uint32_t>(mw.offset());
|
||||
mv2.chunk_index.at(im->count()) = mv2.chunks.size();
|
||||
|
||||
mw.finish_section();
|
||||
|
||||
@ -568,8 +636,9 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
|
||||
log_.info() << "saving directories...";
|
||||
index.resize(siv.inode_no() + im->count());
|
||||
mv2.inode_index.resize(siv.inode_no() + im->count());
|
||||
mw.start_section(section_type::META_DIRECTORIES);
|
||||
save_directories_visitor sdv(mw, index);
|
||||
save_directories_visitor sdv(mw, mv2, ge_data, index);
|
||||
root->accept(sdv);
|
||||
mw.finish_section();
|
||||
|
||||
@ -592,8 +661,37 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
mw.finish_section();
|
||||
|
||||
fsw.write_metadata(std::move(metadata_vec));
|
||||
|
||||
mv2.uids = ge_data.get_uids();
|
||||
mv2.gids = ge_data.get_gids();
|
||||
mv2.modes = ge_data.get_modes();
|
||||
mv2.names = ge_data.get_names();
|
||||
mv2.links = ge_data.get_links();
|
||||
mv2.timestamp_base = ge_data.timestamp_base;
|
||||
mv2.chunk_index_offset = siv.inode_no();
|
||||
mv2.total_fs_size = prog.original_size;
|
||||
|
||||
fsw.write_metadata_v2(freeze_to_buffer(mv2));
|
||||
|
||||
fsw.flush();
|
||||
|
||||
// ::apache::thrift::frozen::freezeToFile(mv2, folly::File("metadata.frozen",
|
||||
// O_RDWR | O_CREAT));
|
||||
|
||||
// auto mapping = folly::MemoryMapping("metadata.frozen");
|
||||
|
||||
// ::apache::thrift::frozen::Layout<thrift::metadata::metadata> layout;
|
||||
// ::apache::thrift::frozen::schema::Schema schema;
|
||||
// auto range = mapping.range();
|
||||
// apache::thrift::CompactSerializer::deserialize(range, schema);
|
||||
|
||||
// log_.info() << ::apache::thrift::debugString(schema);
|
||||
|
||||
// auto mapped =
|
||||
// ::apache::thrift::frozen::mapFrozen<thrift::metadata::metadata>(std::move(mapping));
|
||||
|
||||
// log_.info() << ::apache::thrift::debugString(mapped.thaw());
|
||||
|
||||
log_.info() << "compressed " << size_with_unit(prog.original_size) << " to "
|
||||
<< size_with_unit(prog.compressed_size) << " (ratio="
|
||||
<< static_cast<double>(prog.compressed_size) / prog.original_size
|
||||
|
@ -57,7 +57,7 @@ class basic_worker_group : public worker_group::impl, private Policy {
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_workers; ++i) {
|
||||
workers_.emplace_back([=, this] {
|
||||
workers_.emplace_back([=] {
|
||||
folly::setThreadName(folly::to<std::string>(group_name, i + 1));
|
||||
do_work();
|
||||
});
|
||||
|
@ -29,7 +29,7 @@
|
||||
int main(int argc, char** argv) {
|
||||
if (argc == 2 || argc == 3) {
|
||||
try {
|
||||
dwarfs::stream_logger lgr(std::cerr, dwarfs::logger::INFO);
|
||||
dwarfs::stream_logger lgr(std::cerr, dwarfs::logger::DEBUG);
|
||||
dwarfs::filesystem fs(lgr, std::make_shared<dwarfs::mmap>(argv[1]),
|
||||
dwarfs::block_cache_options());
|
||||
|
||||
@ -48,7 +48,8 @@ int main(int argc, char** argv) {
|
||||
dwarfs::filesystem::identify(
|
||||
lgr, std::make_shared<dwarfs::mmap>(argv[1]), std::cout);
|
||||
// TODO:
|
||||
// fs.dump(std::cout);
|
||||
fs.dump(std::cout);
|
||||
fs.dump_v2(std::cout);
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Error: " << e.what() << std::endl;
|
||||
|
@ -217,7 +217,7 @@ int mkdwarfs(int argc, char** argv) {
|
||||
|
||||
block_manager::config cfg;
|
||||
std::string path, output, window_sizes, memory_limit, script_path,
|
||||
compression, log_level;
|
||||
compression, metadata_compression, log_level;
|
||||
size_t num_workers, max_scanner_workers;
|
||||
bool no_time = false, no_owner = false, recompress = false,
|
||||
no_progress = false;
|
||||
@ -255,6 +255,9 @@ int mkdwarfs(int argc, char** argv) {
|
||||
("compression,C",
|
||||
po::value<std::string>(&compression),
|
||||
"block compression algorithm")
|
||||
("metadata-compression",
|
||||
po::value<std::string>(&metadata_compression),
|
||||
"metadata compression algorithm (default: same as block compression)")
|
||||
("recompress",
|
||||
po::value<bool>(&recompress)->zero_tokens(),
|
||||
"recompress an existing filesystem")
|
||||
@ -363,6 +366,10 @@ int mkdwarfs(int argc, char** argv) {
|
||||
compression = defaults.compression;
|
||||
}
|
||||
|
||||
if (!vm.count("metadata-compression")) {
|
||||
metadata_compression = compression;
|
||||
}
|
||||
|
||||
if (!vm.count("blockhash-window-sizes")) {
|
||||
window_sizes = defaults.window_sizes;
|
||||
}
|
||||
@ -406,8 +413,9 @@ int mkdwarfs(int argc, char** argv) {
|
||||
progress prog([&](const progress& p, bool last) { lgr.update(p, last); });
|
||||
|
||||
block_compressor bc(compression);
|
||||
block_compressor metadata_bc(metadata_compression);
|
||||
std::ofstream ofs(output);
|
||||
filesystem_writer fsw(ofs, lgr, wg_writer, prog, bc, mem_limit);
|
||||
filesystem_writer fsw(ofs, lgr, wg_writer, prog, bc, metadata_bc, mem_limit);
|
||||
|
||||
if (recompress) {
|
||||
auto ti = log.timed_info();
|
||||
|
67
thrift/metadata.thrift
Normal file
67
thrift/metadata.thrift
Normal file
@ -0,0 +1,67 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
namespace cpp2 dwarfs.thrift.metadata
|
||||
|
||||
typedef i16 (cpp2.type = "uint16_t") UInt16
|
||||
typedef i32 (cpp2.type = "uint32_t") UInt32
|
||||
typedef i64 (cpp2.type = "uint64_t") UInt64
|
||||
|
||||
struct chunk {
|
||||
1: required UInt32 block,
|
||||
2: required UInt32 offset,
|
||||
3: required UInt32 size,
|
||||
}
|
||||
|
||||
struct directory {
|
||||
1: required UInt32 self_inode,
|
||||
2: required UInt32 parent_inode,
|
||||
3: required UInt32 first_entry,
|
||||
4: required UInt32 entry_count,
|
||||
}
|
||||
|
||||
struct entry {
|
||||
1: required UInt32 name_index,
|
||||
2: required UInt16 mode,
|
||||
3: required UInt32 inode,
|
||||
4: required UInt16 owner,
|
||||
5: required UInt16 group,
|
||||
6: required UInt64 atime,
|
||||
7: required UInt64 mtime,
|
||||
8: required UInt64 ctime,
|
||||
}
|
||||
|
||||
struct metadata {
|
||||
1: required list<chunk> chunks,
|
||||
2: required list<UInt32> chunk_index,
|
||||
3: required list<directory> directories,
|
||||
4: required list<entry> entries,
|
||||
5: required list<UInt32> inode_index,
|
||||
6: required list<UInt32> dir_link_index,
|
||||
7: required list<UInt16> uids,
|
||||
8: required list<UInt16> gids,
|
||||
9: required list<UInt16> modes,
|
||||
10: required list<string> names,
|
||||
11: required list<string> links,
|
||||
12: required UInt64 timestamp_base,
|
||||
13: required UInt32 chunk_index_offset;
|
||||
14: required UInt64 total_fs_size;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user