mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-10 13:04:15 -04:00
metadata_v2: v1 removal, part 1
This commit is contained in:
parent
0edd99303a
commit
e3a4f8db09
@ -32,14 +32,7 @@
|
|||||||
|
|
||||||
#include <fuse3/fuse_lowlevel.h>
|
#include <fuse3/fuse_lowlevel.h>
|
||||||
|
|
||||||
#define USE_META_V2
|
|
||||||
|
|
||||||
#ifdef USE_META_V2
|
|
||||||
#include "dwarfs/filesystem_v2.h"
|
#include "dwarfs/filesystem_v2.h"
|
||||||
#else
|
|
||||||
#include "dwarfs/filesystem.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "dwarfs/metadata_v2.h"
|
#include "dwarfs/metadata_v2.h"
|
||||||
#include "dwarfs/mmap.h"
|
#include "dwarfs/mmap.h"
|
||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
@ -78,16 +71,9 @@ const struct fuse_opt dwarfs_opts[] = {
|
|||||||
DWARFS_OPT("workers=%s", workers_str, 0),
|
DWARFS_OPT("workers=%s", workers_str, 0),
|
||||||
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), FUSE_OPT_END};
|
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), FUSE_OPT_END};
|
||||||
|
|
||||||
#ifdef USE_META_V2
|
|
||||||
using filesystem = filesystem_v2;
|
|
||||||
#define ENTRY_V2(e) (*(e))
|
|
||||||
#else
|
|
||||||
#define ENTRY_V2(e) (e)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
options opts;
|
options opts;
|
||||||
stream_logger s_lgr(std::cerr);
|
stream_logger s_lgr(std::cerr);
|
||||||
std::shared_ptr<filesystem> s_fs;
|
std::shared_ptr<filesystem_v2> s_fs;
|
||||||
|
|
||||||
void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
|
void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
|
||||||
DEBUG_FUNC("")
|
DEBUG_FUNC("")
|
||||||
@ -96,7 +82,7 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
|
|||||||
bco.num_workers = opts.workers;
|
bco.num_workers = opts.workers;
|
||||||
bco.decompress_ratio = opts.decompress_ratio;
|
bco.decompress_ratio = opts.decompress_ratio;
|
||||||
s_fs =
|
s_fs =
|
||||||
std::make_shared<filesystem>(s_lgr, std::make_shared<mmap>(opts.fsimage),
|
std::make_shared<filesystem_v2>(s_lgr, std::make_shared<mmap>(opts.fsimage),
|
||||||
bco, &opts.stat_defaults, FUSE_ROOT_ID);
|
bco, &opts.stat_defaults, FUSE_ROOT_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,12 +97,12 @@ void op_lookup(fuse_req_t req, fuse_ino_t parent, const char* name) {
|
|||||||
int err = ENOENT;
|
int err = ENOENT;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto de = s_fs->find(parent, name);
|
auto entry = s_fs->find(parent, name);
|
||||||
|
|
||||||
if (de) {
|
if (entry) {
|
||||||
struct ::fuse_entry_param e;
|
struct ::fuse_entry_param e;
|
||||||
|
|
||||||
err = s_fs->getattr(ENTRY_V2(de), &e.attr);
|
err = s_fs->getattr(*entry, &e.attr);
|
||||||
|
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
e.generation = 1;
|
e.generation = 1;
|
||||||
@ -147,12 +133,12 @@ void op_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info*) {
|
|||||||
|
|
||||||
// TODO: merge with op_lookup
|
// TODO: merge with op_lookup
|
||||||
try {
|
try {
|
||||||
auto de = s_fs->find(ino);
|
auto entry = s_fs->find(ino);
|
||||||
|
|
||||||
if (de) {
|
if (entry) {
|
||||||
struct ::stat stbuf;
|
struct ::stat stbuf;
|
||||||
|
|
||||||
err = s_fs->getattr(ENTRY_V2(de), &stbuf);
|
err = s_fs->getattr(*entry, &stbuf);
|
||||||
|
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
fuse_reply_attr(req, &stbuf, std::numeric_limits<double>::max());
|
fuse_reply_attr(req, &stbuf, std::numeric_limits<double>::max());
|
||||||
@ -178,11 +164,11 @@ void op_access(fuse_req_t req, fuse_ino_t ino, int mode) {
|
|||||||
|
|
||||||
// TODO: merge with op_lookup
|
// TODO: merge with op_lookup
|
||||||
try {
|
try {
|
||||||
auto de = s_fs->find(ino);
|
auto entry = s_fs->find(ino);
|
||||||
|
|
||||||
if (de) {
|
if (entry) {
|
||||||
auto ctx = fuse_req_ctx(req);
|
auto ctx = fuse_req_ctx(req);
|
||||||
err = s_fs->access(ENTRY_V2(de), mode, ctx->uid, ctx->gid);
|
err = s_fs->access(*entry, mode, ctx->uid, ctx->gid);
|
||||||
}
|
}
|
||||||
} catch (const dwarfs::error& e) {
|
} catch (const dwarfs::error& e) {
|
||||||
std::cerr << "ERROR: " << e.what() << std::endl;
|
std::cerr << "ERROR: " << e.what() << std::endl;
|
||||||
@ -201,12 +187,12 @@ void op_readlink(fuse_req_t req, fuse_ino_t ino) {
|
|||||||
int err = ENOENT;
|
int err = ENOENT;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto de = s_fs->find(ino);
|
auto entry = s_fs->find(ino);
|
||||||
|
|
||||||
if (de) {
|
if (entry) {
|
||||||
std::string str;
|
std::string str;
|
||||||
|
|
||||||
err = s_fs->readlink(ENTRY_V2(de), &str);
|
err = s_fs->readlink(*entry, &str);
|
||||||
|
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
fuse_reply_readlink(req, str.c_str());
|
fuse_reply_readlink(req, str.c_str());
|
||||||
@ -231,23 +217,15 @@ void op_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) {
|
|||||||
int err = ENOENT;
|
int err = ENOENT;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto de = s_fs->find(ino);
|
auto entry = s_fs->find(ino);
|
||||||
|
|
||||||
if (de) {
|
if (entry) {
|
||||||
#ifdef USE_META_V2
|
if (S_ISDIR(entry->mode())) {
|
||||||
if (S_ISDIR(de->mode())) {
|
|
||||||
#else
|
|
||||||
if (S_ISDIR(de->mode)) {
|
|
||||||
#endif
|
|
||||||
err = EISDIR;
|
err = EISDIR;
|
||||||
} else if (fi->flags & (O_APPEND | O_CREAT | O_TRUNC)) {
|
} else if (fi->flags & (O_APPEND | O_CREAT | O_TRUNC)) {
|
||||||
err = EACCES;
|
err = EACCES;
|
||||||
} else {
|
} else {
|
||||||
#ifdef USE_META_V2
|
fi->fh = FUSE_ROOT_ID + entry->inode();
|
||||||
fi->fh = FUSE_ROOT_ID + de->inode();
|
|
||||||
#else
|
|
||||||
fi->fh = reinterpret_cast<intptr_t>(de);
|
|
||||||
#endif
|
|
||||||
fi->keep_cache = 1;
|
fi->keep_cache = 1;
|
||||||
fuse_reply_open(req, fi);
|
fuse_reply_open(req, fi);
|
||||||
return;
|
return;
|
||||||
@ -271,13 +249,7 @@ void op_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
|
|||||||
int err = ENOENT;
|
int err = ENOENT;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
#ifdef USE_META_V2
|
|
||||||
assert(fi->fh == ino);
|
assert(fi->fh == ino);
|
||||||
#else
|
|
||||||
auto de = reinterpret_cast<const dir_entry*>(fi->fh);
|
|
||||||
|
|
||||||
if (de) {
|
|
||||||
#endif
|
|
||||||
iovec_read_buf buf;
|
iovec_read_buf buf;
|
||||||
ssize_t rv = s_fs->readv(ino, buf, size, off);
|
ssize_t rv = s_fs->readv(ino, buf, size, off);
|
||||||
|
|
||||||
@ -291,9 +263,6 @@ void op_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = -rv;
|
err = -rv;
|
||||||
#ifndef USE_META_V2
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
catch (const dwarfs::error& e) {
|
catch (const dwarfs::error& e) {
|
||||||
std::cerr << "ERROR: " << e.what() << std::endl;
|
std::cerr << "ERROR: " << e.what() << std::endl;
|
||||||
@ -314,33 +283,25 @@ void op_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
|
|||||||
int err = ENOENT;
|
int err = ENOENT;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto de = s_fs->find(ino);
|
auto dirent = s_fs->find(ino);
|
||||||
|
|
||||||
if (de) {
|
if (dirent) {
|
||||||
auto d = s_fs->opendir(ENTRY_V2(de));
|
auto dir = s_fs->opendir(*dirent);
|
||||||
|
|
||||||
if (d) {
|
if (dir) {
|
||||||
off_t lastoff = s_fs->dirsize(ENTRY_V2(d));
|
off_t lastoff = s_fs->dirsize(*dir);
|
||||||
#ifndef USE_META_V2
|
|
||||||
std::string name;
|
|
||||||
#endif
|
|
||||||
struct stat stbuf;
|
struct stat stbuf;
|
||||||
std::vector<char> buf(size);
|
std::vector<char> buf(size);
|
||||||
size_t written = 0;
|
size_t written = 0;
|
||||||
|
|
||||||
while (off < lastoff) {
|
while (off < lastoff) {
|
||||||
#ifdef USE_META_V2
|
auto res = s_fs->readdir(*dir, off);
|
||||||
auto res = s_fs->readdir(*d, off);
|
|
||||||
assert(res);
|
assert(res);
|
||||||
auto [de2, name_view] = *res;
|
|
||||||
std::string name(name_view);
|
|
||||||
#else
|
|
||||||
auto de2 = s_fs->readdir(d, off, &name);
|
|
||||||
#endif
|
|
||||||
s_fs->getattr(de2, &stbuf);
|
|
||||||
|
|
||||||
/// std::cerr << ">>> " << off << "/" << lastoff << " - " << name << "
|
auto [entry, name_view] = *res;
|
||||||
/// - " << stbuf.st_ino << std::endl;
|
std::string name(name_view);
|
||||||
|
|
||||||
|
s_fs->getattr(entry, &stbuf);
|
||||||
|
|
||||||
size_t needed =
|
size_t needed =
|
||||||
fuse_add_direntry(req, &buf[written], buf.size() - written,
|
fuse_add_direntry(req, &buf[written], buf.size() - written,
|
||||||
|
@ -19,37 +19,22 @@
|
|||||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <iostream>
|
|
||||||
#include <map>
|
|
||||||
#include <mutex>
|
|
||||||
#include <set>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/system/system_error.hpp>
|
#include <boost/system/system_error.hpp>
|
||||||
|
|
||||||
#include <folly/Conv.h>
|
|
||||||
#include <folly/String.h>
|
|
||||||
#include <folly/small_vector.h>
|
|
||||||
|
|
||||||
#include <sparsehash/dense_hash_map>
|
|
||||||
#include <sparsehash/dense_hash_set>
|
|
||||||
|
|
||||||
#include "dwarfs/config.h"
|
#include "dwarfs/config.h"
|
||||||
#include "dwarfs/cyclic_hash.h"
|
|
||||||
#include "dwarfs/entry.h"
|
#include "dwarfs/entry.h"
|
||||||
#include "dwarfs/filesystem_writer.h"
|
#include "dwarfs/filesystem_writer.h"
|
||||||
#include "dwarfs/fstypes.h"
|
#include "dwarfs/fstypes.h"
|
||||||
#include "dwarfs/hash_util.h"
|
#include "dwarfs/hash_util.h"
|
||||||
#include "dwarfs/inode_manager.h"
|
#include "dwarfs/inode_manager.h"
|
||||||
#include "dwarfs/logger.h"
|
#include "dwarfs/logger.h"
|
||||||
#include "dwarfs/metadata.h"
|
|
||||||
#include "dwarfs/metadata_v2.h"
|
#include "dwarfs/metadata_v2.h"
|
||||||
#include "dwarfs/metadata_writer.h"
|
#include "dwarfs/metadata_writer.h"
|
||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
@ -59,12 +44,6 @@
|
|||||||
#include "dwarfs/script.h"
|
#include "dwarfs/script.h"
|
||||||
#include "dwarfs/util.h"
|
#include "dwarfs/util.h"
|
||||||
|
|
||||||
#include "dwarfs/gen-cpp2/metadata_layouts.h"
|
|
||||||
#include "dwarfs/gen-cpp2/metadata_types.h"
|
|
||||||
#include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h"
|
|
||||||
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
|
|
||||||
#include <thrift/lib/thrift/gen-cpp2/frozen_types_custom_protocol.h>
|
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
@ -77,28 +56,6 @@ class scanner_ : public scanner::impl {
|
|||||||
void scan(filesystem_writer& fsw, const std::string& path, progress& prog);
|
void scan(filesystem_writer& fsw, const std::string& path, progress& prog);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Key, typename Value, typename HashKey = std::hash<Key>>
|
|
||||||
class fast_hash_map : public google::dense_hash_map<Key, Value, HashKey> {
|
|
||||||
public:
|
|
||||||
fast_hash_map() { this->set_empty_key(Key()); }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, typename HashT = std::hash<T>>
|
|
||||||
class fast_hash_set : public google::dense_hash_set<T, HashT> {
|
|
||||||
public:
|
|
||||||
fast_hash_set() { this->set_empty_key(T()); }
|
|
||||||
};
|
|
||||||
|
|
||||||
// We want these to be ordered
|
|
||||||
// TODO: StringPiece?
|
|
||||||
// TODO: Use dense/unordered maps/sets and sort later?
|
|
||||||
using file_name_table_t =
|
|
||||||
fast_hash_map<size_t, fast_hash_set<std::string_view, folly::Hash>>;
|
|
||||||
|
|
||||||
std::unordered_map<std::string_view, size_t, folly::Hash>
|
|
||||||
compress_names_table(metadata_writer& mw,
|
|
||||||
const file_name_table_t& file_name) const;
|
|
||||||
|
|
||||||
const block_manager::config& cfg_;
|
const block_manager::config& cfg_;
|
||||||
const scanner_options& options_;
|
const scanner_options& options_;
|
||||||
std::shared_ptr<entry_factory> entry_;
|
std::shared_ptr<entry_factory> entry_;
|
||||||
@ -125,94 +82,6 @@ scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
|
|||||||
, lgr_(lgr)
|
, lgr_(lgr)
|
||||||
, log_(lgr) {}
|
, log_(lgr) {}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
|
||||||
std::unordered_map<std::string_view, size_t, folly::Hash>
|
|
||||||
scanner_<LoggerPolicy>::compress_names_table(
|
|
||||||
metadata_writer& mw, const file_name_table_t& file_name) const {
|
|
||||||
log_.info() << "compressing names table...";
|
|
||||||
auto ti = log_.timed_info();
|
|
||||||
|
|
||||||
google::dense_hash_map<uint32_t, uint32_t> index;
|
|
||||||
using position_vector = folly::small_vector<uint32_t, 4>;
|
|
||||||
std::vector<position_vector> positions;
|
|
||||||
index.set_empty_key(0);
|
|
||||||
uint32_t index_pos = 0;
|
|
||||||
|
|
||||||
std::unordered_map<std::string_view, size_t, folly::Hash> offset;
|
|
||||||
size_t saved = 0;
|
|
||||||
size_t orig_offset = mw.offset();
|
|
||||||
|
|
||||||
std::vector<size_t> sizes(file_name.size());
|
|
||||||
std::transform(file_name.begin(), file_name.end(), sizes.begin(),
|
|
||||||
[](const auto& p) { return p.first; });
|
|
||||||
std::sort(sizes.begin(), sizes.end(), std::greater<size_t>());
|
|
||||||
|
|
||||||
for (auto size : sizes) {
|
|
||||||
auto nsi = file_name.find(size);
|
|
||||||
assert(nsi != file_name.end());
|
|
||||||
std::vector<std::string_view> names(nsi->second.size());
|
|
||||||
std::copy(nsi->second.begin(), nsi->second.end(), names.begin());
|
|
||||||
std::sort(names.begin(), names.end());
|
|
||||||
|
|
||||||
for (auto k : names) {
|
|
||||||
bool found = false;
|
|
||||||
|
|
||||||
if (!index.empty() && k.size() >= sizeof(uint32_t)) {
|
|
||||||
uint32_t key;
|
|
||||||
std::memcpy(&key, k.data(), sizeof(key));
|
|
||||||
auto it = index.find(key);
|
|
||||||
if (it != index.end()) {
|
|
||||||
for (uint32_t pos : positions[it->second]) {
|
|
||||||
if (std::memcmp(mw.section_data() + pos + sizeof(key),
|
|
||||||
k.data() + sizeof(key),
|
|
||||||
k.size() - sizeof(key)) == 0) {
|
|
||||||
offset[k] = mw.section_data_offset() + pos;
|
|
||||||
saved += k.size();
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
auto it = std::search(mw.section_begin(), mw.end(), k.begin(), k.end());
|
|
||||||
|
|
||||||
if (it != mw.end()) {
|
|
||||||
offset[k] = it - mw.begin();
|
|
||||||
saved += k.size();
|
|
||||||
found = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!found) {
|
|
||||||
offset[k] = mw.offset();
|
|
||||||
mw.write(k);
|
|
||||||
|
|
||||||
if (mw.section_data_size() >= sizeof(uint32_t)) {
|
|
||||||
uint32_t last = mw.section_data_size() - sizeof(uint32_t);
|
|
||||||
while (index_pos <= last) {
|
|
||||||
uint32_t key;
|
|
||||||
std::memcpy(&key, mw.section_data() + index_pos, sizeof(key));
|
|
||||||
auto r = index.insert(std::make_pair(key, positions.size()));
|
|
||||||
uint32_t pos_index;
|
|
||||||
if (r.second) {
|
|
||||||
pos_index = positions.size();
|
|
||||||
positions.resize(pos_index + 1);
|
|
||||||
} else {
|
|
||||||
pos_index = r.first->second;
|
|
||||||
}
|
|
||||||
positions[pos_index].push_back(index_pos++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ti << "names table: " << size_with_unit(mw.offset() - orig_offset) << " ("
|
|
||||||
<< size_with_unit(saved) << " saved)";
|
|
||||||
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
class dir_set_inode_visitor : public entry_visitor {
|
class dir_set_inode_visitor : public entry_visitor {
|
||||||
public:
|
public:
|
||||||
dir_set_inode_visitor(uint32_t& inode_no)
|
dir_set_inode_visitor(uint32_t& inode_no)
|
||||||
@ -250,24 +119,14 @@ class link_set_inode_visitor : public entry_visitor {
|
|||||||
|
|
||||||
class names_and_links_visitor : public entry_visitor {
|
class names_and_links_visitor : public entry_visitor {
|
||||||
public:
|
public:
|
||||||
names_and_links_visitor(metadata_writer& mw, global_entry_data& data)
|
names_and_links_visitor(global_entry_data& data)
|
||||||
: mw_(mw)
|
: data_(data) {}
|
||||||
, data_(data) {}
|
|
||||||
|
|
||||||
void visit(file* p) override { data_.add_name(p->name()); }
|
void visit(file* p) override { data_.add_name(p->name()); }
|
||||||
|
|
||||||
void visit(link* p) override {
|
void visit(link* p) override {
|
||||||
data_.add_name(p->name());
|
data_.add_name(p->name());
|
||||||
data_.add_link(p->linkname());
|
data_.add_link(p->linkname());
|
||||||
|
|
||||||
const auto& name = p->linkname();
|
|
||||||
auto r = offset_.emplace(name, mw_.offset());
|
|
||||||
if (r.second) {
|
|
||||||
uint16_t len = folly::to<uint16_t>(name.size());
|
|
||||||
mw_.write(len);
|
|
||||||
mw_.write(name);
|
|
||||||
}
|
|
||||||
p->set_offset(r.first->second);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void visit(dir* p) override {
|
void visit(dir* p) override {
|
||||||
@ -277,24 +136,17 @@ class names_and_links_visitor : public entry_visitor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
metadata_writer& mw_;
|
|
||||||
global_entry_data& data_;
|
global_entry_data& data_;
|
||||||
std::unordered_map<std::string_view, size_t, folly::Hash> offset_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class save_directories_visitor : public entry_visitor {
|
class save_directories_visitor : public entry_visitor {
|
||||||
public:
|
public:
|
||||||
save_directories_visitor(metadata_writer& mw, thrift::metadata::metadata& mv2,
|
save_directories_visitor(thrift::metadata::metadata& mv2,
|
||||||
global_entry_data const& ge_data,
|
global_entry_data const& ge_data,
|
||||||
std::vector<uint32_t>& dir_index,
|
std::vector<uint32_t>& dir_index)
|
||||||
std::vector<uint32_t>& index)
|
: mv2_(mv2)
|
||||||
: mw_(mw)
|
|
||||||
, mv2_(mv2)
|
|
||||||
, ge_data_(ge_data)
|
, ge_data_(ge_data)
|
||||||
, dir_index_(dir_index)
|
, dir_index_(dir_index) {}
|
||||||
, cb_([&](const entry* e, size_t offset) {
|
|
||||||
index.at(e->inode_num()) = folly::to<uint32_t>(offset);
|
|
||||||
}) {}
|
|
||||||
|
|
||||||
void visit(file*) override {
|
void visit(file*) override {
|
||||||
// nothing
|
// nothing
|
||||||
@ -308,22 +160,15 @@ class save_directories_visitor : public entry_visitor {
|
|||||||
dir_index_.at(p->inode_num()) = mv2_.directories.size();
|
dir_index_.at(p->inode_num()) = mv2_.directories.size();
|
||||||
p->pack(mv2_, ge_data_);
|
p->pack(mv2_, ge_data_);
|
||||||
|
|
||||||
p->set_offset(mw_.offset());
|
|
||||||
p->pack(mw_.buffer(p->packed_size()), cb_);
|
|
||||||
|
|
||||||
if (!p->has_parent()) {
|
if (!p->has_parent()) {
|
||||||
cb_(p, mw_.offset());
|
|
||||||
p->pack_entry(mw_.buffer(p->packed_entry_size()));
|
|
||||||
p->pack_entry(mv2_, ge_data_);
|
p->pack_entry(mv2_, ge_data_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
metadata_writer& mw_;
|
|
||||||
thrift::metadata::metadata& mv2_;
|
thrift::metadata::metadata& mv2_;
|
||||||
global_entry_data const& ge_data_;
|
global_entry_data const& ge_data_;
|
||||||
std::vector<uint32_t>& dir_index_;
|
std::vector<uint32_t>& dir_index_;
|
||||||
std::function<void(const entry* e, size_t offset)> cb_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
@ -436,20 +281,14 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
|||||||
log_.info() << "waiting for background scanners...";
|
log_.info() << "waiting for background scanners...";
|
||||||
wg_.wait();
|
wg_.wait();
|
||||||
|
|
||||||
size_t total{0};
|
|
||||||
std::unordered_map<std::string_view, std::vector<file*>, folly::Hash>
|
std::unordered_map<std::string_view, std::vector<file*>, folly::Hash>
|
||||||
file_hash;
|
file_hash;
|
||||||
file_name_table_t file_name;
|
|
||||||
|
|
||||||
|
// TODO: turn into visitor?
|
||||||
root->walk([&](entry* ep) {
|
root->walk([&](entry* ep) {
|
||||||
if (auto fp = dynamic_cast<file*>(ep)) {
|
if (auto fp = dynamic_cast<file*>(ep)) {
|
||||||
file_hash[fp->hash()].push_back(fp);
|
file_hash[fp->hash()].push_back(fp);
|
||||||
}
|
}
|
||||||
if (ep->has_parent()) {
|
|
||||||
const std::string& name = ep->name();
|
|
||||||
file_name[name.size()].insert(name);
|
|
||||||
total += name.size();
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
log_.info() << "finding duplicate files...";
|
log_.info() << "finding duplicate files...";
|
||||||
@ -522,31 +361,22 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
|||||||
im->number_inodes(first_file_inode);
|
im->number_inodes(first_file_inode);
|
||||||
|
|
||||||
log_.info() << "building metadata...";
|
log_.info() << "building metadata...";
|
||||||
std::vector<uint8_t> metadata_vec;
|
|
||||||
metadata_writer mw(lgr_, metadata_vec);
|
|
||||||
global_entry_data ge_data(
|
global_entry_data ge_data(
|
||||||
options_.no_time); // TODO: just pass options directly
|
options_.no_time); // TODO: just pass options directly
|
||||||
|
|
||||||
thrift::metadata::metadata mv2;
|
thrift::metadata::metadata mv2;
|
||||||
std::vector<uint32_t> dir_index;
|
std::vector<uint32_t> dir_index;
|
||||||
dir_index.resize(first_link_inode);
|
dir_index.resize(first_link_inode);
|
||||||
mv2.link_index.resize(first_file_inode - first_link_inode);
|
mv2.link_index.resize(first_file_inode - first_link_inode);
|
||||||
|
|
||||||
wg_.add_job([&] {
|
wg_.add_job([&] {
|
||||||
mw.start_section(section_type::META_TABLEDATA);
|
|
||||||
|
|
||||||
log_.info() << "saving links...";
|
log_.info() << "saving links...";
|
||||||
names_and_links_visitor nlv(mw, ge_data);
|
names_and_links_visitor nlv(ge_data);
|
||||||
root->accept(nlv);
|
root->accept(nlv);
|
||||||
|
|
||||||
ge_data.index();
|
ge_data.index();
|
||||||
|
|
||||||
log_.debug() << "link data size = " << mw.section_data_size();
|
|
||||||
|
|
||||||
log_.info() << "saving names...";
|
|
||||||
auto name_offset = compress_names_table(mw, file_name);
|
|
||||||
|
|
||||||
log_.debug() << "name data size = " << mw.section_data_size();
|
|
||||||
|
|
||||||
log_.info() << "updating name offsets...";
|
log_.info() << "updating name offsets...";
|
||||||
root->walk([&](entry* ep) {
|
root->walk([&](entry* ep) {
|
||||||
ep->update(ge_data);
|
ep->update(ge_data);
|
||||||
@ -554,13 +384,6 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
|||||||
mv2.link_index.at(ep->inode_num() - first_link_inode) =
|
mv2.link_index.at(ep->inode_num() - first_link_inode) =
|
||||||
ge_data.get_link_index(lp->linkname());
|
ge_data.get_link_index(lp->linkname());
|
||||||
}
|
}
|
||||||
if (ep->has_parent()) {
|
|
||||||
auto i = name_offset.find(ep->name());
|
|
||||||
if (i == name_offset.end()) {
|
|
||||||
throw std::runtime_error("offset not found for entry name");
|
|
||||||
}
|
|
||||||
ep->set_name_offset(i->second);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -593,74 +416,32 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
|||||||
log_.debug() << "saved by segmenting: "
|
log_.debug() << "saved by segmenting: "
|
||||||
<< size_with_unit(prog.saved_by_segmentation);
|
<< size_with_unit(prog.saved_by_segmentation);
|
||||||
|
|
||||||
// mv2.string_table = std::string(
|
// this is actually needed
|
||||||
// reinterpret_cast<char const*>(mw.section_data()),
|
|
||||||
// mw.section_data_size());
|
|
||||||
|
|
||||||
// TODO: not sure that's actually needed
|
|
||||||
root->set_name(std::string());
|
root->set_name(std::string());
|
||||||
|
|
||||||
log_.info() << "saving chunks...";
|
log_.info() << "saving chunks...";
|
||||||
std::vector<uint32_t> index;
|
|
||||||
index.resize(im->count() + 1);
|
|
||||||
mv2.chunk_index.resize(im->count() + 1);
|
mv2.chunk_index.resize(im->count() + 1);
|
||||||
|
|
||||||
// TODO: we should be able to start this once all blocks have been
|
// TODO: we should be able to start this once all blocks have been
|
||||||
// submitted for compression
|
// submitted for compression
|
||||||
mw.align(im->chunk_size());
|
|
||||||
im->for_each_inode([&](std::shared_ptr<inode> const& ino) {
|
im->for_each_inode([&](std::shared_ptr<inode> const& ino) {
|
||||||
index.at(ino->num() - first_file_inode) = folly::to<uint32_t>(mw.offset());
|
|
||||||
mv2.chunk_index.at(ino->num() - first_file_inode) = mv2.chunks.size();
|
mv2.chunk_index.at(ino->num() - first_file_inode) = mv2.chunks.size();
|
||||||
mw.write(ino->chunks());
|
|
||||||
ino->append_chunks(mv2.chunks);
|
ino->append_chunks(mv2.chunks);
|
||||||
});
|
});
|
||||||
|
|
||||||
// insert dummy inode to help determine number of chunks per inode
|
// insert dummy inode to help determine number of chunks per inode
|
||||||
index.at(im->count()) = folly::to<uint32_t>(mw.offset());
|
|
||||||
mv2.chunk_index.at(im->count()) = mv2.chunks.size();
|
mv2.chunk_index.at(im->count()) = mv2.chunks.size();
|
||||||
|
|
||||||
mw.finish_section();
|
|
||||||
|
|
||||||
size_t num_chunks = (index.back() - index.front()) / sizeof(chunk_type);
|
|
||||||
|
|
||||||
log_.debug() << "total number of file inodes: " << im->count();
|
log_.debug() << "total number of file inodes: " << im->count();
|
||||||
log_.debug() << "total number of chunks: " << num_chunks;
|
log_.debug() << "total number of chunks: " << mv2.chunks.size();
|
||||||
|
|
||||||
log_.info() << "saving chunk index...";
|
|
||||||
mw.start_section(section_type::META_CHUNK_INDEX);
|
|
||||||
mw.write(index);
|
|
||||||
mw.finish_section();
|
|
||||||
|
|
||||||
log_.info() << "saving directories...";
|
log_.info() << "saving directories...";
|
||||||
index.resize(first_file_inode + im->count());
|
|
||||||
mv2.entry_index.resize(first_file_inode + im->count());
|
mv2.entry_index.resize(first_file_inode + im->count());
|
||||||
mw.start_section(section_type::META_DIRECTORIES);
|
save_directories_visitor sdv(mv2, ge_data, dir_index);
|
||||||
save_directories_visitor sdv(mw, mv2, ge_data, dir_index, index);
|
|
||||||
root->accept(sdv);
|
root->accept(sdv);
|
||||||
mw.finish_section();
|
|
||||||
|
|
||||||
log_.info() << "saving inode index...";
|
|
||||||
mw.start_section(section_type::META_INODE_INDEX);
|
|
||||||
mw.write(index);
|
|
||||||
mw.finish_section();
|
|
||||||
|
|
||||||
log_.info() << "saving metadata config...";
|
|
||||||
mw.start_section(section_type::META_CONFIG);
|
|
||||||
meta_config mconf;
|
|
||||||
mconf.block_size_bits = folly::to<uint8_t>(im->block_size_bits());
|
|
||||||
mconf.de_type = entry_->de_type();
|
|
||||||
mconf.unused = 0;
|
|
||||||
mconf.inode_count = first_file_inode + im->count();
|
|
||||||
mconf.orig_fs_size = prog.original_size;
|
|
||||||
mconf.chunk_index_offset = first_file_inode;
|
|
||||||
mconf.inode_index_offset = 0;
|
|
||||||
mw.write(mconf);
|
|
||||||
mw.finish_section();
|
|
||||||
|
|
||||||
// TODO: remove all metadata v1 code
|
|
||||||
// fsw.write_metadata(std::move(metadata_vec));
|
|
||||||
|
|
||||||
{
|
{
|
||||||
|
// order directories by inode number
|
||||||
std::vector<thrift::metadata::directory> tmp = std::move(mv2.directories);
|
std::vector<thrift::metadata::directory> tmp = std::move(mv2.directories);
|
||||||
mv2.directories.reserve(tmp.size());
|
mv2.directories.reserve(tmp.size());
|
||||||
for (auto i : dir_index) {
|
for (auto i : dir_index) {
|
||||||
|
@ -22,7 +22,6 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "dwarfs/filesystem.h"
|
|
||||||
#include "dwarfs/filesystem_v2.h"
|
#include "dwarfs/filesystem_v2.h"
|
||||||
#include "dwarfs/mmap.h"
|
#include "dwarfs/mmap.h"
|
||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
@ -32,26 +31,22 @@ int main(int argc, char** argv) {
|
|||||||
try {
|
try {
|
||||||
dwarfs::stream_logger lgr(std::cerr, dwarfs::logger::DEBUG);
|
dwarfs::stream_logger lgr(std::cerr, dwarfs::logger::DEBUG);
|
||||||
auto mm = std::make_shared<dwarfs::mmap>(argv[1]);
|
auto mm = std::make_shared<dwarfs::mmap>(argv[1]);
|
||||||
dwarfs::filesystem fs(lgr, mm, dwarfs::block_cache_options());
|
dwarfs::filesystem_v2 fs(lgr, mm, dwarfs::block_cache_options());
|
||||||
dwarfs::filesystem_v2 fs_v2(lgr, mm, dwarfs::block_cache_options());
|
|
||||||
|
|
||||||
if (argc == 3) {
|
if (argc == 3) {
|
||||||
auto de = fs.find(argv[2]);
|
auto entry = fs.find(argv[2]);
|
||||||
|
|
||||||
if (de) {
|
if (entry) {
|
||||||
struct ::stat stbuf;
|
struct ::stat stbuf;
|
||||||
fs.getattr(de, &stbuf);
|
fs.getattr(*entry, &stbuf);
|
||||||
std::vector<char> data(stbuf.st_size);
|
std::vector<char> data(stbuf.st_size);
|
||||||
fs.read(stbuf.st_ino, &data[0], data.size(), 0);
|
fs.read(stbuf.st_ino, &data[0], data.size(), 0);
|
||||||
std::cout.write(&data[0], data.size());
|
std::cout.write(&data[0], data.size());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO: add more usage options...
|
// TODO: add more usage options...
|
||||||
dwarfs::filesystem::identify(lgr, mm, std::cout);
|
|
||||||
fs.dump(std::cout);
|
|
||||||
|
|
||||||
dwarfs::filesystem_v2::identify(lgr, mm, std::cout);
|
dwarfs::filesystem_v2::identify(lgr, mm, std::cout);
|
||||||
fs_v2.dump(std::cout);
|
fs.dump(std::cout);
|
||||||
}
|
}
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception& e) {
|
||||||
std::cerr << "Error: " << e.what() << std::endl;
|
std::cerr << "Error: " << e.what() << std::endl;
|
||||||
|
@ -47,7 +47,7 @@
|
|||||||
#include "dwarfs/block_manager.h"
|
#include "dwarfs/block_manager.h"
|
||||||
#include "dwarfs/console_writer.h"
|
#include "dwarfs/console_writer.h"
|
||||||
#include "dwarfs/entry.h"
|
#include "dwarfs/entry.h"
|
||||||
#include "dwarfs/filesystem.h"
|
#include "dwarfs/filesystem_v2.h"
|
||||||
#include "dwarfs/filesystem_writer.h"
|
#include "dwarfs/filesystem_writer.h"
|
||||||
#include "dwarfs/logger.h"
|
#include "dwarfs/logger.h"
|
||||||
#include "dwarfs/lua_script.h"
|
#include "dwarfs/lua_script.h"
|
||||||
@ -458,7 +458,7 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
|
|
||||||
if (recompress) {
|
if (recompress) {
|
||||||
auto ti = log.timed_info();
|
auto ti = log.timed_info();
|
||||||
filesystem::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path), fsw);
|
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path), fsw);
|
||||||
wg_writer.wait();
|
wg_writer.wait();
|
||||||
ti << "filesystem rewritten";
|
ti << "filesystem rewritten";
|
||||||
} else {
|
} else {
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
|
|
||||||
#include "dwarfs/block_compressor.h"
|
#include "dwarfs/block_compressor.h"
|
||||||
#include "dwarfs/entry.h"
|
#include "dwarfs/entry.h"
|
||||||
#include "dwarfs/filesystem.h"
|
|
||||||
#include "dwarfs/filesystem_v2.h"
|
#include "dwarfs/filesystem_v2.h"
|
||||||
#include "dwarfs/filesystem_writer.h"
|
#include "dwarfs/filesystem_writer.h"
|
||||||
#include "dwarfs/logger.h"
|
#include "dwarfs/logger.h"
|
||||||
@ -163,31 +162,6 @@ using namespace dwarfs;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
dir_entry const* get_entry(dir_entry const* de) { return de; }
|
|
||||||
|
|
||||||
entry_view get_entry(std::optional<entry_view> entry) { return *entry; }
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void test_created_filesystem(T const& fs) {
|
|
||||||
auto de = fs.find("/foo.pl");
|
|
||||||
struct ::stat st;
|
|
||||||
|
|
||||||
ASSERT_TRUE(de);
|
|
||||||
|
|
||||||
auto entry = get_entry(de);
|
|
||||||
|
|
||||||
EXPECT_EQ(fs.getattr(entry, &st), 0);
|
|
||||||
EXPECT_EQ(st.st_size, 23456);
|
|
||||||
|
|
||||||
int inode = fs.open(entry);
|
|
||||||
EXPECT_GE(inode, 0);
|
|
||||||
|
|
||||||
std::vector<char> buf(st.st_size);
|
|
||||||
ssize_t rv = fs.read(inode, &buf[0], st.st_size, 0);
|
|
||||||
EXPECT_EQ(rv, st.st_size);
|
|
||||||
EXPECT_EQ(std::string(buf.begin(), buf.end()), test::loremipsum(st.st_size));
|
|
||||||
}
|
|
||||||
|
|
||||||
void basic_end_to_end_test(const std::string& compressor,
|
void basic_end_to_end_test(const std::string& compressor,
|
||||||
unsigned block_size_bits, file_order_mode file_order,
|
unsigned block_size_bits, file_order_mode file_order,
|
||||||
bool no_owner, bool no_time) {
|
bool no_owner, bool no_time) {
|
||||||
@ -225,11 +199,23 @@ void basic_end_to_end_test(const std::string& compressor,
|
|||||||
block_cache_options bco;
|
block_cache_options bco;
|
||||||
bco.max_bytes = 1 << 20;
|
bco.max_bytes = 1 << 20;
|
||||||
|
|
||||||
filesystem fs(lgr, mm, bco);
|
filesystem_v2 fs(lgr, mm, bco);
|
||||||
test_created_filesystem(fs);
|
|
||||||
|
|
||||||
filesystem_v2 fs_v2(lgr, mm, bco);
|
auto entry = fs.find("/foo.pl");
|
||||||
test_created_filesystem(fs_v2);
|
struct ::stat st;
|
||||||
|
|
||||||
|
ASSERT_TRUE(entry);
|
||||||
|
|
||||||
|
EXPECT_EQ(fs.getattr(*entry, &st), 0);
|
||||||
|
EXPECT_EQ(st.st_size, 23456);
|
||||||
|
|
||||||
|
int inode = fs.open(*entry);
|
||||||
|
EXPECT_GE(inode, 0);
|
||||||
|
|
||||||
|
std::vector<char> buf(st.st_size);
|
||||||
|
ssize_t rv = fs.read(inode, &buf[0], st.st_size, 0);
|
||||||
|
EXPECT_EQ(rv, st.st_size);
|
||||||
|
EXPECT_EQ(std::string(buf.begin(), buf.end()), test::loremipsum(st.st_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> const compressions{"null",
|
std::vector<std::string> const compressions{"null",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user