mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-17 08:18:07 -04:00
Add --time-resolution
option
This commit is contained in:
parent
a629dad243
commit
37244274c4
@ -119,6 +119,13 @@ Most other options are concerned with compression tuning:
|
||||
reduce the size of the file system. You can pass either a unix time stamp
|
||||
or `now`.
|
||||
|
||||
* `--time-resolution=`*sec*|`sec`|`min`|`hour`|`day`:
|
||||
Specify the resolution with which time stamps are stored. By default,
|
||||
time stamps are stored with second resolution. You can specify "odd"
|
||||
resolutions as well, e.g. something like 15 second resolution is
|
||||
entirely possible. Moving from second to minute resolution, for example,
|
||||
will save roughly 6 bits per file system entry in the metadata block.
|
||||
|
||||
* `--keep-all-times`:
|
||||
As of release 0.3.0, by default, `mkdwarfs` will only save the contents of
|
||||
the `mtime` field in order to save metadata space. If you want to save
|
||||
|
@ -93,6 +93,8 @@ class global_entry_data {
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t get_time_offset(uint64_t time) const;
|
||||
|
||||
std::unordered_map<uint16_t, uint16_t> uids_;
|
||||
std::unordered_map<uint16_t, uint16_t> gids_;
|
||||
std::unordered_map<uint16_t, uint16_t> modes_;
|
||||
|
@ -62,6 +62,7 @@ struct scanner_options {
|
||||
std::optional<uint64_t> timestamp;
|
||||
bool keep_all_times{false};
|
||||
bool remove_empty_dirs{false};
|
||||
uint32_t time_resolution_sec{1};
|
||||
inode_options inode;
|
||||
};
|
||||
|
||||
|
@ -61,22 +61,27 @@ void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
|
||||
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
|
||||
}
|
||||
|
||||
uint64_t global_entry_data::get_time_offset(uint64_t time) const {
|
||||
return (time - timestamp_base_) / options_.time_resolution_sec;
|
||||
}
|
||||
|
||||
uint64_t global_entry_data::get_mtime_offset(uint64_t time) const {
|
||||
return !options_.timestamp ? time - timestamp_base_ : UINT64_C(0);
|
||||
return !options_.timestamp ? get_time_offset(time) : UINT64_C(0);
|
||||
}
|
||||
|
||||
uint64_t global_entry_data::get_atime_offset(uint64_t time) const {
|
||||
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_
|
||||
return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
|
||||
: UINT64_C(0);
|
||||
}
|
||||
|
||||
uint64_t global_entry_data::get_ctime_offset(uint64_t time) const {
|
||||
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_
|
||||
return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
|
||||
: UINT64_C(0);
|
||||
}
|
||||
|
||||
uint64_t global_entry_data::get_timestamp_base() const {
|
||||
return options_.timestamp ? *options_.timestamp : timestamp_base_;
|
||||
return (options_.timestamp ? *options_.timestamp : timestamp_base_) /
|
||||
options_.time_resolution_sec;
|
||||
}
|
||||
|
||||
uint16_t global_entry_data::get_uid_index(uint16_t uid) const {
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
@ -626,6 +627,13 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
|
||||
auto timebase = meta_.timestamp_base();
|
||||
auto inode = entry.inode();
|
||||
bool mtime_only = meta_.options() && meta_.options()->mtime_only();
|
||||
uint32_t resolution = 1;
|
||||
if (meta_.options()) {
|
||||
if (auto res = meta_.options()->time_resolution_sec()) {
|
||||
resolution = *res;
|
||||
assert(resolution > 0);
|
||||
}
|
||||
}
|
||||
|
||||
stbuf->st_mode = mode;
|
||||
|
||||
@ -635,11 +643,11 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
|
||||
stbuf->st_blocks = (stbuf->st_size + 511) / 512;
|
||||
stbuf->st_uid = entry.getuid();
|
||||
stbuf->st_gid = entry.getgid();
|
||||
stbuf->st_mtime = timebase + entry.mtime_offset();
|
||||
stbuf->st_atime =
|
||||
mtime_only ? stbuf->st_mtime : timebase + entry.atime_offset();
|
||||
stbuf->st_ctime =
|
||||
mtime_only ? stbuf->st_mtime : timebase + entry.ctime_offset();
|
||||
stbuf->st_mtime = resolution * (timebase + entry.mtime_offset());
|
||||
stbuf->st_atime = mtime_only ? stbuf->st_mtime
|
||||
: resolution * (timebase + entry.atime_offset());
|
||||
stbuf->st_ctime = mtime_only ? stbuf->st_mtime
|
||||
: resolution * (timebase + entry.ctime_offset());
|
||||
stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode)
|
||||
? nlinks_.at(inode - chunk_index_offset_)
|
||||
: 1;
|
||||
|
@ -562,6 +562,9 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
||||
|
||||
thrift::metadata::fs_options fsopts;
|
||||
fsopts.mtime_only = !options_.keep_all_times;
|
||||
if (options_.time_resolution_sec > 1) {
|
||||
fsopts.time_resolution_sec_ref() = options_.time_resolution_sec;
|
||||
}
|
||||
|
||||
mv2.uids = ge_data.get_uids();
|
||||
mv2.gids = ge_data.get_gids();
|
||||
|
@ -98,6 +98,13 @@ const std::map<std::string, file_order_mode> order_choices{
|
||||
{"similarity", file_order_mode::SIMILARITY},
|
||||
{"nilsimsa", file_order_mode::NILSIMSA}};
|
||||
|
||||
const std::map<std::string, uint32_t> time_resolutions{
|
||||
{"sec", 1},
|
||||
{"min", 60},
|
||||
{"hour", 3600},
|
||||
{"day", 86400},
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace dwarfs {
|
||||
@ -281,7 +288,8 @@ int mkdwarfs(int argc, char** argv) {
|
||||
|
||||
block_manager::config cfg;
|
||||
std::string path, output, window_sizes, memory_limit, script_arg, compression,
|
||||
schema_compression, metadata_compression, log_level, timestamp;
|
||||
schema_compression, metadata_compression, log_level, timestamp,
|
||||
time_resolution;
|
||||
size_t num_workers, max_scanner_workers;
|
||||
bool recompress = false, no_progress = false;
|
||||
unsigned level;
|
||||
@ -292,6 +300,10 @@ int mkdwarfs(int argc, char** argv) {
|
||||
auto order_desc =
|
||||
"file order (" + (from(order_choices) | get<0>() | unsplit(", ")) + ")";
|
||||
|
||||
auto resolution_desc = "time resolution in seconds or (" +
|
||||
(from(time_resolutions) | get<0>() | unsplit(", ")) +
|
||||
")";
|
||||
|
||||
// clang-format off
|
||||
po::options_description opts("Command line options");
|
||||
opts.add_options()
|
||||
@ -340,6 +352,9 @@ int mkdwarfs(int argc, char** argv) {
|
||||
("keep-all-times",
|
||||
po::value<bool>(&options.keep_all_times)->zero_tokens(),
|
||||
"save atime and ctime in addition to mtime")
|
||||
("time-resolution",
|
||||
po::value<std::string>(&time_resolution)->default_value("sec"),
|
||||
resolution_desc.c_str())
|
||||
("order",
|
||||
po::value<file_order_mode>(&options.file_order)
|
||||
->default_value(file_order_mode::SIMILARITY, "similarity"),
|
||||
@ -541,6 +556,16 @@ int mkdwarfs(int argc, char** argv) {
|
||||
: folly::to<uint64_t>(timestamp);
|
||||
}
|
||||
|
||||
if (auto it = time_resolutions.find(time_resolution);
|
||||
it != time_resolutions.end()) {
|
||||
options.time_resolution_sec = it->second;
|
||||
} else {
|
||||
options.time_resolution_sec = folly::to<uint32_t>(time_resolution);
|
||||
if (options.time_resolution_sec == 0) {
|
||||
throw std::runtime_error("timestamp resolution cannot be 0");
|
||||
}
|
||||
}
|
||||
|
||||
log_proxy<debug_logger_policy> log(lgr);
|
||||
|
||||
progress prog([&](const progress& p, bool last) { lgr.update(p, last); });
|
||||
|
@ -89,6 +89,10 @@ struct entry {
|
||||
struct fs_options {
|
||||
// file system contains only mtime time stamps
|
||||
1: required bool mtime_only,
|
||||
|
||||
// time base and offsets are stored with this resolution
|
||||
// 1 = seconds, 60 = minutes, 3600 = hours, ...
|
||||
2: optional UInt32 time_resolution_sec,
|
||||
}
|
||||
|
||||
struct metadata {
|
||||
|
Loading…
x
Reference in New Issue
Block a user