Add --time-resolution option

This commit is contained in:
Marcus Holland-Moritz 2020-12-08 13:41:31 +01:00
parent a629dad243
commit 37244274c4
8 changed files with 65 additions and 10 deletions

View File

@ -119,6 +119,13 @@ Most other options are concerned with compression tuning:
reduce the size of the file system. You can pass either a unix time stamp
or `now`.
* `--time-resolution=`*sec*|`sec`|`min`|`hour`|`day`:
Specify the resolution with which time stamps are stored. By default,
time stamps are stored with second resolution. You can specify "odd"
resolutions as well, e.g. something like 15 second resolution is
entirely possible. Moving from second to minute resolution, for example,
will save roughly 6 bits per file system entry in the metadata block.
* `--keep-all-times`:
As of release 0.3.0, by default, `mkdwarfs` will only save the contents of
the `mtime` field in order to save metadata space. If you want to save

View File

@ -93,6 +93,8 @@ class global_entry_data {
}
}
uint64_t get_time_offset(uint64_t time) const;
std::unordered_map<uint16_t, uint16_t> uids_;
std::unordered_map<uint16_t, uint16_t> gids_;
std::unordered_map<uint16_t, uint16_t> modes_;

View File

@ -62,6 +62,7 @@ struct scanner_options {
std::optional<uint64_t> timestamp;
bool keep_all_times{false};
bool remove_empty_dirs{false};
uint32_t time_resolution_sec{1};
inode_options inode;
};

View File

@ -61,22 +61,27 @@ void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
}
uint64_t global_entry_data::get_time_offset(uint64_t time) const {
return (time - timestamp_base_) / options_.time_resolution_sec;
}
uint64_t global_entry_data::get_mtime_offset(uint64_t time) const {
return !options_.timestamp ? time - timestamp_base_ : UINT64_C(0);
return !options_.timestamp ? get_time_offset(time) : UINT64_C(0);
}
uint64_t global_entry_data::get_atime_offset(uint64_t time) const {
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_
return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
: UINT64_C(0);
}
uint64_t global_entry_data::get_ctime_offset(uint64_t time) const {
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_
return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
: UINT64_C(0);
}
uint64_t global_entry_data::get_timestamp_base() const {
return options_.timestamp ? *options_.timestamp : timestamp_base_;
return (options_.timestamp ? *options_.timestamp : timestamp_base_) /
options_.time_resolution_sec;
}
uint16_t global_entry_data::get_uid_index(uint16_t uid) const {

View File

@ -20,6 +20,7 @@
*/
#include <algorithm>
#include <cassert>
#include <cerrno>
#include <climits>
#include <cstring>
@ -626,6 +627,13 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
auto timebase = meta_.timestamp_base();
auto inode = entry.inode();
bool mtime_only = meta_.options() && meta_.options()->mtime_only();
uint32_t resolution = 1;
if (meta_.options()) {
if (auto res = meta_.options()->time_resolution_sec()) {
resolution = *res;
assert(resolution > 0);
}
}
stbuf->st_mode = mode;
@ -635,11 +643,11 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
stbuf->st_blocks = (stbuf->st_size + 511) / 512;
stbuf->st_uid = entry.getuid();
stbuf->st_gid = entry.getgid();
stbuf->st_mtime = timebase + entry.mtime_offset();
stbuf->st_atime =
mtime_only ? stbuf->st_mtime : timebase + entry.atime_offset();
stbuf->st_ctime =
mtime_only ? stbuf->st_mtime : timebase + entry.ctime_offset();
stbuf->st_mtime = resolution * (timebase + entry.mtime_offset());
stbuf->st_atime = mtime_only ? stbuf->st_mtime
: resolution * (timebase + entry.atime_offset());
stbuf->st_ctime = mtime_only ? stbuf->st_mtime
: resolution * (timebase + entry.ctime_offset());
stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode)
? nlinks_.at(inode - chunk_index_offset_)
: 1;

View File

@ -562,6 +562,9 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
thrift::metadata::fs_options fsopts;
fsopts.mtime_only = !options_.keep_all_times;
if (options_.time_resolution_sec > 1) {
fsopts.time_resolution_sec_ref() = options_.time_resolution_sec;
}
mv2.uids = ge_data.get_uids();
mv2.gids = ge_data.get_gids();

View File

@ -98,6 +98,13 @@ const std::map<std::string, file_order_mode> order_choices{
{"similarity", file_order_mode::SIMILARITY},
{"nilsimsa", file_order_mode::NILSIMSA}};
const std::map<std::string, uint32_t> time_resolutions{
{"sec", 1},
{"min", 60},
{"hour", 3600},
{"day", 86400},
};
} // namespace
namespace dwarfs {
@ -281,7 +288,8 @@ int mkdwarfs(int argc, char** argv) {
block_manager::config cfg;
std::string path, output, window_sizes, memory_limit, script_arg, compression,
schema_compression, metadata_compression, log_level, timestamp;
schema_compression, metadata_compression, log_level, timestamp,
time_resolution;
size_t num_workers, max_scanner_workers;
bool recompress = false, no_progress = false;
unsigned level;
@ -292,6 +300,10 @@ int mkdwarfs(int argc, char** argv) {
auto order_desc =
"file order (" + (from(order_choices) | get<0>() | unsplit(", ")) + ")";
auto resolution_desc = "time resolution in seconds or (" +
(from(time_resolutions) | get<0>() | unsplit(", ")) +
")";
// clang-format off
po::options_description opts("Command line options");
opts.add_options()
@ -340,6 +352,9 @@ int mkdwarfs(int argc, char** argv) {
("keep-all-times",
po::value<bool>(&options.keep_all_times)->zero_tokens(),
"save atime and ctime in addition to mtime")
("time-resolution",
po::value<std::string>(&time_resolution)->default_value("sec"),
resolution_desc.c_str())
("order",
po::value<file_order_mode>(&options.file_order)
->default_value(file_order_mode::SIMILARITY, "similarity"),
@ -541,6 +556,16 @@ int mkdwarfs(int argc, char** argv) {
: folly::to<uint64_t>(timestamp);
}
if (auto it = time_resolutions.find(time_resolution);
it != time_resolutions.end()) {
options.time_resolution_sec = it->second;
} else {
options.time_resolution_sec = folly::to<uint32_t>(time_resolution);
if (options.time_resolution_sec == 0) {
throw std::runtime_error("timestamp resolution cannot be 0");
}
}
log_proxy<debug_logger_policy> log(lgr);
progress prog([&](const progress& p, bool last) { lgr.update(p, last); });

View File

@ -89,6 +89,10 @@ struct entry {
struct fs_options {
// file system contains only mtime time stamps
1: required bool mtime_only,
// time base and offsets are stored with this resolution
// 1 = seconds, 60 = minutes, 3600 = hours, ...
2: optional UInt32 time_resolution_sec,
}
struct metadata {