From 37244274c4ad83d9011943faab29673a481805be Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Tue, 8 Dec 2020 13:41:31 +0100 Subject: [PATCH] Add `--time-resolution` option --- doc/mkdwarfs.md | 7 +++++++ include/dwarfs/global_entry_data.h | 2 ++ include/dwarfs/options.h | 1 + src/dwarfs/global_entry_data.cpp | 13 +++++++++---- src/dwarfs/metadata_v2.cpp | 18 +++++++++++++----- src/dwarfs/scanner.cpp | 3 +++ src/mkdwarfs.cpp | 27 ++++++++++++++++++++++++++- thrift/metadata.thrift | 4 ++++ 8 files changed, 65 insertions(+), 10 deletions(-) diff --git a/doc/mkdwarfs.md b/doc/mkdwarfs.md index eb9e44e6..887209a9 100644 --- a/doc/mkdwarfs.md +++ b/doc/mkdwarfs.md @@ -119,6 +119,13 @@ Most other options are concerned with compression tuning: reduce the size of the file system. You can pass either a unix time stamp or `now`. + * `--time-resolution=`*sec*|`sec`|`min`|`hour`|`day`: + Specify the resolution with which time stamps are stored. By default, + time stamps are stored with second resolution. You can specify "odd" + resolutions as well, e.g. something like 15 second resolution is + entirely possible. Moving from second to minute resolution, for example, + will save roughly 6 bits per file system entry in the metadata block. + * `--keep-all-times`: As of release 0.3.0, by default, `mkdwarfs` will only save the contents of the `mtime` field in order to save metadata space. If you want to save diff --git a/include/dwarfs/global_entry_data.h b/include/dwarfs/global_entry_data.h index e5440051..81a82bb0 100644 --- a/include/dwarfs/global_entry_data.h +++ b/include/dwarfs/global_entry_data.h @@ -93,6 +93,8 @@ class global_entry_data { } } + uint64_t get_time_offset(uint64_t time) const; + std::unordered_map uids_; std::unordered_map gids_; std::unordered_map modes_; diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index e8b7b00e..4a789af1 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -62,6 +62,7 @@ struct scanner_options { std::optional timestamp; bool keep_all_times{false}; bool remove_empty_dirs{false}; + uint32_t time_resolution_sec{1}; inode_options inode; }; diff --git a/src/dwarfs/global_entry_data.cpp b/src/dwarfs/global_entry_data.cpp index f4f08485..7572da37 100644 --- a/src/dwarfs/global_entry_data.cpp +++ b/src/dwarfs/global_entry_data.cpp @@ -61,22 +61,27 @@ void global_entry_data::index(std::unordered_map& map) { from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; }; } +uint64_t global_entry_data::get_time_offset(uint64_t time) const { + return (time - timestamp_base_) / options_.time_resolution_sec; +} + uint64_t global_entry_data::get_mtime_offset(uint64_t time) const { - return !options_.timestamp ? time - timestamp_base_ : UINT64_C(0); + return !options_.timestamp ? get_time_offset(time) : UINT64_C(0); } uint64_t global_entry_data::get_atime_offset(uint64_t time) const { - return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_ + return !options_.timestamp && options_.keep_all_times ? get_time_offset(time) : UINT64_C(0); } uint64_t global_entry_data::get_ctime_offset(uint64_t time) const { - return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_ + return !options_.timestamp && options_.keep_all_times ? get_time_offset(time) : UINT64_C(0); } uint64_t global_entry_data::get_timestamp_base() const { - return options_.timestamp ? *options_.timestamp : timestamp_base_; + return (options_.timestamp ? *options_.timestamp : timestamp_base_) / + options_.time_resolution_sec; } uint16_t global_entry_data::get_uid_index(uint16_t uid) const { diff --git a/src/dwarfs/metadata_v2.cpp b/src/dwarfs/metadata_v2.cpp index 9ba616ef..890f2401 100644 --- a/src/dwarfs/metadata_v2.cpp +++ b/src/dwarfs/metadata_v2.cpp @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -626,6 +627,13 @@ int metadata_::getattr(entry_view entry, auto timebase = meta_.timestamp_base(); auto inode = entry.inode(); bool mtime_only = meta_.options() && meta_.options()->mtime_only(); + uint32_t resolution = 1; + if (meta_.options()) { + if (auto res = meta_.options()->time_resolution_sec()) { + resolution = *res; + assert(resolution > 0); + } + } stbuf->st_mode = mode; @@ -635,11 +643,11 @@ int metadata_::getattr(entry_view entry, stbuf->st_blocks = (stbuf->st_size + 511) / 512; stbuf->st_uid = entry.getuid(); stbuf->st_gid = entry.getgid(); - stbuf->st_mtime = timebase + entry.mtime_offset(); - stbuf->st_atime = - mtime_only ? stbuf->st_mtime : timebase + entry.atime_offset(); - stbuf->st_ctime = - mtime_only ? stbuf->st_mtime : timebase + entry.ctime_offset(); + stbuf->st_mtime = resolution * (timebase + entry.mtime_offset()); + stbuf->st_atime = mtime_only ? stbuf->st_mtime + : resolution * (timebase + entry.atime_offset()); + stbuf->st_ctime = mtime_only ? stbuf->st_mtime + : resolution * (timebase + entry.ctime_offset()); stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode) ? nlinks_.at(inode - chunk_index_offset_) : 1; diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index d8854807..a6c2fdd2 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -562,6 +562,9 @@ void scanner_::scan(filesystem_writer& fsw, thrift::metadata::fs_options fsopts; fsopts.mtime_only = !options_.keep_all_times; + if (options_.time_resolution_sec > 1) { + fsopts.time_resolution_sec_ref() = options_.time_resolution_sec; + } mv2.uids = ge_data.get_uids(); mv2.gids = ge_data.get_gids(); diff --git a/src/mkdwarfs.cpp b/src/mkdwarfs.cpp index 48204762..10b12300 100644 --- a/src/mkdwarfs.cpp +++ b/src/mkdwarfs.cpp @@ -98,6 +98,13 @@ const std::map order_choices{ {"similarity", file_order_mode::SIMILARITY}, {"nilsimsa", file_order_mode::NILSIMSA}}; +const std::map time_resolutions{ + {"sec", 1}, + {"min", 60}, + {"hour", 3600}, + {"day", 86400}, +}; + } // namespace namespace dwarfs { @@ -281,7 +288,8 @@ int mkdwarfs(int argc, char** argv) { block_manager::config cfg; std::string path, output, window_sizes, memory_limit, script_arg, compression, - schema_compression, metadata_compression, log_level, timestamp; + schema_compression, metadata_compression, log_level, timestamp, + time_resolution; size_t num_workers, max_scanner_workers; bool recompress = false, no_progress = false; unsigned level; @@ -292,6 +300,10 @@ int mkdwarfs(int argc, char** argv) { auto order_desc = "file order (" + (from(order_choices) | get<0>() | unsplit(", ")) + ")"; + auto resolution_desc = "time resolution in seconds or (" + + (from(time_resolutions) | get<0>() | unsplit(", ")) + + ")"; + // clang-format off po::options_description opts("Command line options"); opts.add_options() @@ -340,6 +352,9 @@ int mkdwarfs(int argc, char** argv) { ("keep-all-times", po::value(&options.keep_all_times)->zero_tokens(), "save atime and ctime in addition to mtime") + ("time-resolution", + po::value(&time_resolution)->default_value("sec"), + resolution_desc.c_str()) ("order", po::value(&options.file_order) ->default_value(file_order_mode::SIMILARITY, "similarity"), @@ -541,6 +556,16 @@ int mkdwarfs(int argc, char** argv) { : folly::to(timestamp); } + if (auto it = time_resolutions.find(time_resolution); + it != time_resolutions.end()) { + options.time_resolution_sec = it->second; + } else { + options.time_resolution_sec = folly::to(time_resolution); + if (options.time_resolution_sec == 0) { + throw std::runtime_error("timestamp resolution cannot be 0"); + } + } + log_proxy log(lgr); progress prog([&](const progress& p, bool last) { lgr.update(p, last); }); diff --git a/thrift/metadata.thrift b/thrift/metadata.thrift index fe24356e..7ff444e5 100644 --- a/thrift/metadata.thrift +++ b/thrift/metadata.thrift @@ -89,6 +89,10 @@ struct entry { struct fs_options { // file system contains only mtime time stamps 1: required bool mtime_only, + + // time base and offsets are stored with this resolution + // 1 = seconds, 60 = minutes, 3600 = hours, ... + 2: optional UInt32 time_resolution_sec, } struct metadata {