Add --time-resolution option

This commit is contained in:
Marcus Holland-Moritz 2020-12-08 13:41:31 +01:00
parent a629dad243
commit 37244274c4
8 changed files with 65 additions and 10 deletions

View File

@ -119,6 +119,13 @@ Most other options are concerned with compression tuning:
reduce the size of the file system. You can pass either a unix time stamp reduce the size of the file system. You can pass either a unix time stamp
or `now`. or `now`.
* `--time-resolution=`*sec*|`sec`|`min`|`hour`|`day`:
Specify the resolution with which time stamps are stored. By default,
time stamps are stored with second resolution. You can specify "odd"
resolutions as well, e.g. something like 15 second resolution is
entirely possible. Moving from second to minute resolution, for example,
will save roughly 6 bits per file system entry in the metadata block.
* `--keep-all-times`: * `--keep-all-times`:
As of release 0.3.0, by default, `mkdwarfs` will only save the contents of As of release 0.3.0, by default, `mkdwarfs` will only save the contents of
the `mtime` field in order to save metadata space. If you want to save the `mtime` field in order to save metadata space. If you want to save

View File

@ -93,6 +93,8 @@ class global_entry_data {
} }
} }
uint64_t get_time_offset(uint64_t time) const;
std::unordered_map<uint16_t, uint16_t> uids_; std::unordered_map<uint16_t, uint16_t> uids_;
std::unordered_map<uint16_t, uint16_t> gids_; std::unordered_map<uint16_t, uint16_t> gids_;
std::unordered_map<uint16_t, uint16_t> modes_; std::unordered_map<uint16_t, uint16_t> modes_;

View File

@ -62,6 +62,7 @@ struct scanner_options {
std::optional<uint64_t> timestamp; std::optional<uint64_t> timestamp;
bool keep_all_times{false}; bool keep_all_times{false};
bool remove_empty_dirs{false}; bool remove_empty_dirs{false};
uint32_t time_resolution_sec{1};
inode_options inode; inode_options inode;
}; };

View File

@ -61,22 +61,27 @@ void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; }; from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
} }
uint64_t global_entry_data::get_time_offset(uint64_t time) const {
return (time - timestamp_base_) / options_.time_resolution_sec;
}
uint64_t global_entry_data::get_mtime_offset(uint64_t time) const { uint64_t global_entry_data::get_mtime_offset(uint64_t time) const {
return !options_.timestamp ? time - timestamp_base_ : UINT64_C(0); return !options_.timestamp ? get_time_offset(time) : UINT64_C(0);
} }
uint64_t global_entry_data::get_atime_offset(uint64_t time) const { uint64_t global_entry_data::get_atime_offset(uint64_t time) const {
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_ return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
: UINT64_C(0); : UINT64_C(0);
} }
uint64_t global_entry_data::get_ctime_offset(uint64_t time) const { uint64_t global_entry_data::get_ctime_offset(uint64_t time) const {
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_ return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
: UINT64_C(0); : UINT64_C(0);
} }
uint64_t global_entry_data::get_timestamp_base() const { uint64_t global_entry_data::get_timestamp_base() const {
return options_.timestamp ? *options_.timestamp : timestamp_base_; return (options_.timestamp ? *options_.timestamp : timestamp_base_) /
options_.time_resolution_sec;
} }
uint16_t global_entry_data::get_uid_index(uint16_t uid) const { uint16_t global_entry_data::get_uid_index(uint16_t uid) const {

View File

@ -20,6 +20,7 @@
*/ */
#include <algorithm> #include <algorithm>
#include <cassert>
#include <cerrno> #include <cerrno>
#include <climits> #include <climits>
#include <cstring> #include <cstring>
@ -626,6 +627,13 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
auto timebase = meta_.timestamp_base(); auto timebase = meta_.timestamp_base();
auto inode = entry.inode(); auto inode = entry.inode();
bool mtime_only = meta_.options() && meta_.options()->mtime_only(); bool mtime_only = meta_.options() && meta_.options()->mtime_only();
uint32_t resolution = 1;
if (meta_.options()) {
if (auto res = meta_.options()->time_resolution_sec()) {
resolution = *res;
assert(resolution > 0);
}
}
stbuf->st_mode = mode; stbuf->st_mode = mode;
@ -635,11 +643,11 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
stbuf->st_blocks = (stbuf->st_size + 511) / 512; stbuf->st_blocks = (stbuf->st_size + 511) / 512;
stbuf->st_uid = entry.getuid(); stbuf->st_uid = entry.getuid();
stbuf->st_gid = entry.getgid(); stbuf->st_gid = entry.getgid();
stbuf->st_mtime = timebase + entry.mtime_offset(); stbuf->st_mtime = resolution * (timebase + entry.mtime_offset());
stbuf->st_atime = stbuf->st_atime = mtime_only ? stbuf->st_mtime
mtime_only ? stbuf->st_mtime : timebase + entry.atime_offset(); : resolution * (timebase + entry.atime_offset());
stbuf->st_ctime = stbuf->st_ctime = mtime_only ? stbuf->st_mtime
mtime_only ? stbuf->st_mtime : timebase + entry.ctime_offset(); : resolution * (timebase + entry.ctime_offset());
stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode) stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode)
? nlinks_.at(inode - chunk_index_offset_) ? nlinks_.at(inode - chunk_index_offset_)
: 1; : 1;

View File

@ -562,6 +562,9 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
thrift::metadata::fs_options fsopts; thrift::metadata::fs_options fsopts;
fsopts.mtime_only = !options_.keep_all_times; fsopts.mtime_only = !options_.keep_all_times;
if (options_.time_resolution_sec > 1) {
fsopts.time_resolution_sec_ref() = options_.time_resolution_sec;
}
mv2.uids = ge_data.get_uids(); mv2.uids = ge_data.get_uids();
mv2.gids = ge_data.get_gids(); mv2.gids = ge_data.get_gids();

View File

@ -98,6 +98,13 @@ const std::map<std::string, file_order_mode> order_choices{
{"similarity", file_order_mode::SIMILARITY}, {"similarity", file_order_mode::SIMILARITY},
{"nilsimsa", file_order_mode::NILSIMSA}}; {"nilsimsa", file_order_mode::NILSIMSA}};
const std::map<std::string, uint32_t> time_resolutions{
{"sec", 1},
{"min", 60},
{"hour", 3600},
{"day", 86400},
};
} // namespace } // namespace
namespace dwarfs { namespace dwarfs {
@ -281,7 +288,8 @@ int mkdwarfs(int argc, char** argv) {
block_manager::config cfg; block_manager::config cfg;
std::string path, output, window_sizes, memory_limit, script_arg, compression, std::string path, output, window_sizes, memory_limit, script_arg, compression,
schema_compression, metadata_compression, log_level, timestamp; schema_compression, metadata_compression, log_level, timestamp,
time_resolution;
size_t num_workers, max_scanner_workers; size_t num_workers, max_scanner_workers;
bool recompress = false, no_progress = false; bool recompress = false, no_progress = false;
unsigned level; unsigned level;
@ -292,6 +300,10 @@ int mkdwarfs(int argc, char** argv) {
auto order_desc = auto order_desc =
"file order (" + (from(order_choices) | get<0>() | unsplit(", ")) + ")"; "file order (" + (from(order_choices) | get<0>() | unsplit(", ")) + ")";
auto resolution_desc = "time resolution in seconds or (" +
(from(time_resolutions) | get<0>() | unsplit(", ")) +
")";
// clang-format off // clang-format off
po::options_description opts("Command line options"); po::options_description opts("Command line options");
opts.add_options() opts.add_options()
@ -340,6 +352,9 @@ int mkdwarfs(int argc, char** argv) {
("keep-all-times", ("keep-all-times",
po::value<bool>(&options.keep_all_times)->zero_tokens(), po::value<bool>(&options.keep_all_times)->zero_tokens(),
"save atime and ctime in addition to mtime") "save atime and ctime in addition to mtime")
("time-resolution",
po::value<std::string>(&time_resolution)->default_value("sec"),
resolution_desc.c_str())
("order", ("order",
po::value<file_order_mode>(&options.file_order) po::value<file_order_mode>(&options.file_order)
->default_value(file_order_mode::SIMILARITY, "similarity"), ->default_value(file_order_mode::SIMILARITY, "similarity"),
@ -541,6 +556,16 @@ int mkdwarfs(int argc, char** argv) {
: folly::to<uint64_t>(timestamp); : folly::to<uint64_t>(timestamp);
} }
if (auto it = time_resolutions.find(time_resolution);
it != time_resolutions.end()) {
options.time_resolution_sec = it->second;
} else {
options.time_resolution_sec = folly::to<uint32_t>(time_resolution);
if (options.time_resolution_sec == 0) {
throw std::runtime_error("timestamp resolution cannot be 0");
}
}
log_proxy<debug_logger_policy> log(lgr); log_proxy<debug_logger_policy> log(lgr);
progress prog([&](const progress& p, bool last) { lgr.update(p, last); }); progress prog([&](const progress& p, bool last) { lgr.update(p, last); });

View File

@ -89,6 +89,10 @@ struct entry {
struct fs_options { struct fs_options {
// file system contains only mtime time stamps // file system contains only mtime time stamps
1: required bool mtime_only, 1: required bool mtime_only,
// time base and offsets are stored with this resolution
// 1 = seconds, 60 = minutes, 3600 = hours, ...
2: optional UInt32 time_resolution_sec,
} }
struct metadata { struct metadata {