diff --git a/doc/dwarfs.md b/doc/dwarfs.md index bf01549b..c2063060 100644 --- a/doc/dwarfs.md +++ b/doc/dwarfs.md @@ -54,6 +54,16 @@ options: try or require `mlock()`ing of the file system metadata into memory. + * `-o enable_nlink` + Set this option if you want correct hardlink counts for regular + files. If this is not specified, the hardlink count will be 1. + Enabling this will slow down the initialization of the fuse + driver as the hardlink counts will be determined by a full + file system scan (it only takes about a millisecond to scan + through 100,000 files, so this isn't dramatic). The fuse driver + will also consume more memory to hold the hardlink count table. + This will be 4 bytes for every regular file inode. + * `-o debuglevel=`*name*: Use this for different levels of verbosity along with either the `-f` or `-d` FUSE options. This can give you some insight diff --git a/include/dwarfs/metadata_v2.h b/include/dwarfs/metadata_v2.h index e5d28fd7..2845ec58 100644 --- a/include/dwarfs/metadata_v2.h +++ b/include/dwarfs/metadata_v2.h @@ -46,6 +46,8 @@ namespace dwarfs { class logger; +struct metadata_options; + namespace thrift::metadata { class metadata; } @@ -55,7 +57,8 @@ class metadata_v2 { metadata_v2() = default; metadata_v2(logger& lgr, folly::ByteRange schema, folly::ByteRange data, - const struct ::stat* defaults = nullptr, int inode_offset = 0); + metadata_options const& options, + struct ::stat const* defaults = nullptr, int inode_offset = 0); metadata_v2& operator=(metadata_v2&&) = default; diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index 24736e6a..89ba796b 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -35,9 +35,14 @@ struct block_cache_options { double decompress_ratio{1.0}; }; +struct metadata_options { + bool enable_nlink{false}; +}; + struct filesystem_options { mlock_mode lock_mode{mlock_mode::NONE}; block_cache_options block_cache; + metadata_options metadata; }; enum class file_order_mode { NONE, PATH, SCRIPT, SIMILARITY }; diff --git a/src/dwarfs.cpp b/src/dwarfs.cpp index 2d3b92f2..94d3b098 100644 --- a/src/dwarfs.cpp +++ b/src/dwarfs.cpp @@ -52,6 +52,7 @@ struct options { const char* workers_str; // TODO: const?? -> use string? const char* mlock_str; // TODO: const?? -> use string? const char* decompress_ratio_str; // TODO: const?? -> use string? + int enable_nlink; size_t cachesize; size_t workers; mlock_mode lock_mode; @@ -75,6 +76,7 @@ const struct fuse_opt dwarfs_opts[] = { DWARFS_OPT("workers=%s", workers_str, 0), DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), + DWARFS_OPT("enable_nlink", enable_nlink, 1), FUSE_OPT_END}; options opts; @@ -90,13 +92,14 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) { try { auto ti = log.timed_info(); - filesystem_options options; - options.lock_mode = opts.lock_mode; - options.block_cache.max_bytes = opts.cachesize; - options.block_cache.num_workers = opts.workers; - options.block_cache.decompress_ratio = opts.decompress_ratio; + filesystem_options fsopts; + fsopts.lock_mode = opts.lock_mode; + fsopts.block_cache.max_bytes = opts.cachesize; + fsopts.block_cache.num_workers = opts.workers; + fsopts.block_cache.decompress_ratio = opts.decompress_ratio; + fsopts.metadata.enable_nlink = bool(opts.enable_nlink); s_fs = std::make_shared( - s_lgr, std::make_shared(opts.fsimage), options, + s_lgr, std::make_shared(opts.fsimage), fsopts, &opts.stat_defaults, FUSE_ROOT_ID); ti << "file system initialized"; @@ -388,6 +391,7 @@ void usage(const char* progname) { << " -o workers=NUM number of worker threads (2)\n" << " -o mlock=NAME mlock mode: (none), try, must\n" << " -o decratio=NUM ratio for full decompression (0.8)\n" + << " -o enable_nlink show correct hardlink numbers\n" << " -o debuglevel=NAME error, warn, (info), debug, trace\n" << std::endl; diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index c6cf0721..681bf000 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -144,6 +144,7 @@ metadata_v2 make_metadata(logger& lgr, std::shared_ptr mm, section_map const& sections, std::vector& schema_buffer, std::vector& meta_buffer, + const metadata_options& options, const struct ::stat* stat_defaults = nullptr, int inode_offset = 0, bool force_buffers = false, mlock_mode lock_mode = mlock_mode::NONE) { @@ -177,7 +178,7 @@ make_metadata(logger& lgr, std::shared_ptr mm, return metadata_v2( lgr, get_section_data(mm, schema_it->second, schema_buffer, force_buffers), - meta_section, stat_defaults, inode_offset); + meta_section, options, stat_defaults, inode_offset); } template @@ -243,7 +244,8 @@ filesystem_::filesystem_(logger& lgr, std::shared_ptr mm, std::vector schema_buffer; meta_ = make_metadata(lgr, mm_, sections, schema_buffer, meta_buffer_, - stat_defaults, inode_offset, false, options.lock_mode); + options.metadata, stat_defaults, inode_offset, false, + options.lock_mode); log_.debug() << "read " << cache.block_count() << " blocks and " << meta_.size() << " bytes of metadata"; @@ -393,8 +395,8 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog, std::vector schema_raw; std::vector meta_raw; - auto meta = - make_metadata(lgr, mm, sections, schema_raw, meta_raw, nullptr, 0, true); + auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw, + metadata_options(), nullptr, 0, true); struct ::statvfs stbuf; meta.statvfs(&stbuf); diff --git a/src/dwarfs/metadata_v2.cpp b/src/dwarfs/metadata_v2.cpp index bb362010..2d5db9bb 100644 --- a/src/dwarfs/metadata_v2.cpp +++ b/src/dwarfs/metadata_v2.cpp @@ -38,6 +38,7 @@ #include "dwarfs/logger.h" #include "dwarfs/metadata_v2.h" +#include "dwarfs/options.h" #include "dwarfs/gen-cpp2/metadata_layouts.h" #include "dwarfs/gen-cpp2/metadata_types_custom_protocol.h" @@ -99,7 +100,8 @@ class metadata_ : public metadata_v2::impl { public: // TODO: defaults?, remove metadata_(logger& lgr, folly::ByteRange schema, folly::ByteRange data, - const struct ::stat* /*defaults*/, int inode_offset) + metadata_options const& options, struct ::stat const* /*defaults*/, + int inode_offset) : data_(data) , meta_(map_frozen(schema, data_)) , root_(meta_.entries()[meta_.entry_index()[0]], &meta_) @@ -107,7 +109,9 @@ class metadata_ : public metadata_v2::impl { , inode_offset_(inode_offset) , link_index_offset_(find_index_offset(inode_rank::INO_LNK)) , chunk_index_offset_(find_index_offset(inode_rank::INO_REG)) - , dev_index_offset_(find_index_offset(inode_rank::INO_DEV)) { + , dev_index_offset_(find_index_offset(inode_rank::INO_DEV)) + , nlinks_(build_nlinks(options)) + , options_(options) { log_.debug() << "link index offset: " << link_index_offset_; log_.debug() << "chunk index offset: " << chunk_index_offset_; log_.debug() << "device index offset: " << dev_index_offset_; @@ -285,6 +289,28 @@ class metadata_ : public metadata_v2::impl { return 0; } + std::vector build_nlinks(metadata_options const& options) const { + std::vector nlinks; + + if (options.enable_nlink) { + auto ti = log_.timed_debug(); + + nlinks.resize(dev_index_offset_ - chunk_index_offset_); + + for (auto e : meta_.entries()) { + auto index = int(e.inode()) - chunk_index_offset_; + if (index >= 0 && index < int(nlinks.size())) { + ++nlinks.at(index); + } + } + + ti << "build hardlink table (" << sizeof(uint32_t) * nlinks.capacity() + << " bytes)"; + } + + return nlinks; + } + folly::ByteRange data_; MappedFrozen meta_; entry_view root_; @@ -293,6 +319,8 @@ class metadata_ : public metadata_v2::impl { const int link_index_offset_; const int chunk_index_offset_; const int dev_index_offset_; + const std::vector nlinks_; + const metadata_options options_; }; template @@ -488,7 +516,9 @@ int metadata_::getattr(entry_view entry, auto inode = entry.inode(); stbuf->st_mode = mode; - stbuf->st_size = file_size(entry, mode); + + stbuf->st_size = S_ISDIR(mode) ? make_directory_view(entry).entry_count() + : file_size(entry, mode); stbuf->st_ino = inode + inode_offset_; stbuf->st_blocks = (stbuf->st_size + 511) / 512; stbuf->st_uid = entry.getuid(); @@ -496,6 +526,9 @@ int metadata_::getattr(entry_view entry, stbuf->st_atime = timebase + entry.atime_offset(); stbuf->st_mtime = timebase + entry.mtime_offset(); stbuf->st_ctime = timebase + entry.ctime_offset(); + stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode) + ? nlinks_.at(inode - chunk_index_offset_) + : 1; if (S_ISBLK(mode) || S_ISCHR(mode)) { stbuf->st_rdev = get_device_id(inode); @@ -649,10 +682,10 @@ metadata_v2::freeze(const thrift::metadata::metadata& data) { } metadata_v2::metadata_v2(logger& lgr, folly::ByteRange schema, - folly::ByteRange data, const struct ::stat* defaults, - int inode_offset) + folly::ByteRange data, metadata_options const& options, + struct ::stat const* defaults, int inode_offset) : impl_(make_unique_logging_object( - lgr, schema, data, defaults, inode_offset)) {} + lgr, schema, data, options, defaults, inode_offset)) {} } // namespace dwarfs