Add support for correct hardlink counts

This commit is contained in:
Marcus Holland-Moritz 2020-12-02 15:16:38 +01:00
parent 0a248ecbf6
commit c59eea1517
6 changed files with 74 additions and 17 deletions

View File

@ -54,6 +54,16 @@ options:
try or require `mlock()`ing of the file system metadata into
memory.
* `-o enable_nlink`
Set this option if you want correct hardlink counts for regular
files. If this is not specified, the hardlink count will be 1.
Enabling this will slow down the initialization of the fuse
driver as the hardlink counts will be determined by a full
file system scan (it only takes about a millisecond to scan
through 100,000 files, so this isn't dramatic). The fuse driver
will also consume more memory to hold the hardlink count table.
This will be 4 bytes for every regular file inode.
* `-o debuglevel=`*name*:
Use this for different levels of verbosity along with either
the `-f` or `-d` FUSE options. This can give you some insight

View File

@ -46,6 +46,8 @@ namespace dwarfs {
class logger;
struct metadata_options;
namespace thrift::metadata {
class metadata;
}
@ -55,7 +57,8 @@ class metadata_v2 {
metadata_v2() = default;
metadata_v2(logger& lgr, folly::ByteRange schema, folly::ByteRange data,
const struct ::stat* defaults = nullptr, int inode_offset = 0);
metadata_options const& options,
struct ::stat const* defaults = nullptr, int inode_offset = 0);
metadata_v2& operator=(metadata_v2&&) = default;

View File

@ -35,9 +35,14 @@ struct block_cache_options {
double decompress_ratio{1.0};
};
struct metadata_options {
bool enable_nlink{false};
};
struct filesystem_options {
mlock_mode lock_mode{mlock_mode::NONE};
block_cache_options block_cache;
metadata_options metadata;
};
enum class file_order_mode { NONE, PATH, SCRIPT, SIMILARITY };

View File

@ -52,6 +52,7 @@ struct options {
const char* workers_str; // TODO: const?? -> use string?
const char* mlock_str; // TODO: const?? -> use string?
const char* decompress_ratio_str; // TODO: const?? -> use string?
int enable_nlink;
size_t cachesize;
size_t workers;
mlock_mode lock_mode;
@ -75,6 +76,7 @@ const struct fuse_opt dwarfs_opts[] = {
DWARFS_OPT("workers=%s", workers_str, 0),
DWARFS_OPT("mlock=%s", mlock_str, 0),
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0),
DWARFS_OPT("enable_nlink", enable_nlink, 1),
FUSE_OPT_END};
options opts;
@ -90,13 +92,14 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
try {
auto ti = log.timed_info();
filesystem_options options;
options.lock_mode = opts.lock_mode;
options.block_cache.max_bytes = opts.cachesize;
options.block_cache.num_workers = opts.workers;
options.block_cache.decompress_ratio = opts.decompress_ratio;
filesystem_options fsopts;
fsopts.lock_mode = opts.lock_mode;
fsopts.block_cache.max_bytes = opts.cachesize;
fsopts.block_cache.num_workers = opts.workers;
fsopts.block_cache.decompress_ratio = opts.decompress_ratio;
fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
s_fs = std::make_shared<filesystem_v2>(
s_lgr, std::make_shared<mmap>(opts.fsimage), options,
s_lgr, std::make_shared<mmap>(opts.fsimage), fsopts,
&opts.stat_defaults, FUSE_ROOT_ID);
ti << "file system initialized";
@ -388,6 +391,7 @@ void usage(const char* progname) {
<< " -o workers=NUM number of worker threads (2)\n"
<< " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n"
<< " -o enable_nlink show correct hardlink numbers\n"
<< " -o debuglevel=NAME error, warn, (info), debug, trace\n"
<< std::endl;

View File

@ -144,6 +144,7 @@ metadata_v2
make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
section_map const& sections, std::vector<uint8_t>& schema_buffer,
std::vector<uint8_t>& meta_buffer,
const metadata_options& options,
const struct ::stat* stat_defaults = nullptr,
int inode_offset = 0, bool force_buffers = false,
mlock_mode lock_mode = mlock_mode::NONE) {
@ -177,7 +178,7 @@ make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
return metadata_v2(
lgr,
get_section_data(mm, schema_it->second, schema_buffer, force_buffers),
meta_section, stat_defaults, inode_offset);
meta_section, options, stat_defaults, inode_offset);
}
template <typename LoggerPolicy>
@ -243,7 +244,8 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
std::vector<uint8_t> schema_buffer;
meta_ = make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_,
stat_defaults, inode_offset, false, options.lock_mode);
options.metadata, stat_defaults, inode_offset, false,
options.lock_mode);
log_.debug() << "read " << cache.block_count() << " blocks and "
<< meta_.size() << " bytes of metadata";
@ -393,8 +395,8 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
std::vector<uint8_t> schema_raw;
std::vector<uint8_t> meta_raw;
auto meta =
make_metadata(lgr, mm, sections, schema_raw, meta_raw, nullptr, 0, true);
auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw,
metadata_options(), nullptr, 0, true);
struct ::statvfs stbuf;
meta.statvfs(&stbuf);

View File

@ -38,6 +38,7 @@
#include "dwarfs/logger.h"
#include "dwarfs/metadata_v2.h"
#include "dwarfs/options.h"
#include "dwarfs/gen-cpp2/metadata_layouts.h"
#include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h"
@ -99,7 +100,8 @@ class metadata_ : public metadata_v2::impl {
public:
// TODO: defaults?, remove
metadata_(logger& lgr, folly::ByteRange schema, folly::ByteRange data,
const struct ::stat* /*defaults*/, int inode_offset)
metadata_options const& options, struct ::stat const* /*defaults*/,
int inode_offset)
: data_(data)
, meta_(map_frozen<thrift::metadata::metadata>(schema, data_))
, root_(meta_.entries()[meta_.entry_index()[0]], &meta_)
@ -107,7 +109,9 @@ class metadata_ : public metadata_v2::impl {
, inode_offset_(inode_offset)
, link_index_offset_(find_index_offset(inode_rank::INO_LNK))
, chunk_index_offset_(find_index_offset(inode_rank::INO_REG))
, dev_index_offset_(find_index_offset(inode_rank::INO_DEV)) {
, dev_index_offset_(find_index_offset(inode_rank::INO_DEV))
, nlinks_(build_nlinks(options))
, options_(options) {
log_.debug() << "link index offset: " << link_index_offset_;
log_.debug() << "chunk index offset: " << chunk_index_offset_;
log_.debug() << "device index offset: " << dev_index_offset_;
@ -285,6 +289,28 @@ class metadata_ : public metadata_v2::impl {
return 0;
}
std::vector<uint32_t> build_nlinks(metadata_options const& options) const {
std::vector<uint32_t> nlinks;
if (options.enable_nlink) {
auto ti = log_.timed_debug();
nlinks.resize(dev_index_offset_ - chunk_index_offset_);
for (auto e : meta_.entries()) {
auto index = int(e.inode()) - chunk_index_offset_;
if (index >= 0 && index < int(nlinks.size())) {
++nlinks.at(index);
}
}
ti << "build hardlink table (" << sizeof(uint32_t) * nlinks.capacity()
<< " bytes)";
}
return nlinks;
}
folly::ByteRange data_;
MappedFrozen<thrift::metadata::metadata> meta_;
entry_view root_;
@ -293,6 +319,8 @@ class metadata_ : public metadata_v2::impl {
const int link_index_offset_;
const int chunk_index_offset_;
const int dev_index_offset_;
const std::vector<uint32_t> nlinks_;
const metadata_options options_;
};
template <typename LoggerPolicy>
@ -488,7 +516,9 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
auto inode = entry.inode();
stbuf->st_mode = mode;
stbuf->st_size = file_size(entry, mode);
stbuf->st_size = S_ISDIR(mode) ? make_directory_view(entry).entry_count()
: file_size(entry, mode);
stbuf->st_ino = inode + inode_offset_;
stbuf->st_blocks = (stbuf->st_size + 511) / 512;
stbuf->st_uid = entry.getuid();
@ -496,6 +526,9 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
stbuf->st_atime = timebase + entry.atime_offset();
stbuf->st_mtime = timebase + entry.mtime_offset();
stbuf->st_ctime = timebase + entry.ctime_offset();
stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode)
? nlinks_.at(inode - chunk_index_offset_)
: 1;
if (S_ISBLK(mode) || S_ISCHR(mode)) {
stbuf->st_rdev = get_device_id(inode);
@ -649,10 +682,10 @@ metadata_v2::freeze(const thrift::metadata::metadata& data) {
}
metadata_v2::metadata_v2(logger& lgr, folly::ByteRange schema,
folly::ByteRange data, const struct ::stat* defaults,
int inode_offset)
folly::ByteRange data, metadata_options const& options,
struct ::stat const* defaults, int inode_offset)
: impl_(make_unique_logging_object<metadata_v2::impl, metadata_,
logger_policies>(
lgr, schema, data, defaults, inode_offset)) {}
lgr, schema, data, options, defaults, inode_offset)) {}
} // namespace dwarfs