mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-18 00:40:30 -04:00
Add --time-resolution
option
This commit is contained in:
parent
a629dad243
commit
37244274c4
@ -119,6 +119,13 @@ Most other options are concerned with compression tuning:
|
|||||||
reduce the size of the file system. You can pass either a unix time stamp
|
reduce the size of the file system. You can pass either a unix time stamp
|
||||||
or `now`.
|
or `now`.
|
||||||
|
|
||||||
|
* `--time-resolution=`*sec*|`sec`|`min`|`hour`|`day`:
|
||||||
|
Specify the resolution with which time stamps are stored. By default,
|
||||||
|
time stamps are stored with second resolution. You can specify "odd"
|
||||||
|
resolutions as well, e.g. something like 15 second resolution is
|
||||||
|
entirely possible. Moving from second to minute resolution, for example,
|
||||||
|
will save roughly 6 bits per file system entry in the metadata block.
|
||||||
|
|
||||||
* `--keep-all-times`:
|
* `--keep-all-times`:
|
||||||
As of release 0.3.0, by default, `mkdwarfs` will only save the contents of
|
As of release 0.3.0, by default, `mkdwarfs` will only save the contents of
|
||||||
the `mtime` field in order to save metadata space. If you want to save
|
the `mtime` field in order to save metadata space. If you want to save
|
||||||
|
@ -93,6 +93,8 @@ class global_entry_data {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t get_time_offset(uint64_t time) const;
|
||||||
|
|
||||||
std::unordered_map<uint16_t, uint16_t> uids_;
|
std::unordered_map<uint16_t, uint16_t> uids_;
|
||||||
std::unordered_map<uint16_t, uint16_t> gids_;
|
std::unordered_map<uint16_t, uint16_t> gids_;
|
||||||
std::unordered_map<uint16_t, uint16_t> modes_;
|
std::unordered_map<uint16_t, uint16_t> modes_;
|
||||||
|
@ -62,6 +62,7 @@ struct scanner_options {
|
|||||||
std::optional<uint64_t> timestamp;
|
std::optional<uint64_t> timestamp;
|
||||||
bool keep_all_times{false};
|
bool keep_all_times{false};
|
||||||
bool remove_empty_dirs{false};
|
bool remove_empty_dirs{false};
|
||||||
|
uint32_t time_resolution_sec{1};
|
||||||
inode_options inode;
|
inode_options inode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -61,22 +61,27 @@ void global_entry_data::index(std::unordered_map<std::string, uint32_t>& map) {
|
|||||||
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
|
from(map) | get<0>() | order | [&](std::string const& s) { map[s] = ix++; };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t global_entry_data::get_time_offset(uint64_t time) const {
|
||||||
|
return (time - timestamp_base_) / options_.time_resolution_sec;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t global_entry_data::get_mtime_offset(uint64_t time) const {
|
uint64_t global_entry_data::get_mtime_offset(uint64_t time) const {
|
||||||
return !options_.timestamp ? time - timestamp_base_ : UINT64_C(0);
|
return !options_.timestamp ? get_time_offset(time) : UINT64_C(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t global_entry_data::get_atime_offset(uint64_t time) const {
|
uint64_t global_entry_data::get_atime_offset(uint64_t time) const {
|
||||||
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_
|
return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
|
||||||
: UINT64_C(0);
|
: UINT64_C(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t global_entry_data::get_ctime_offset(uint64_t time) const {
|
uint64_t global_entry_data::get_ctime_offset(uint64_t time) const {
|
||||||
return !options_.timestamp && options_.keep_all_times ? time - timestamp_base_
|
return !options_.timestamp && options_.keep_all_times ? get_time_offset(time)
|
||||||
: UINT64_C(0);
|
: UINT64_C(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t global_entry_data::get_timestamp_base() const {
|
uint64_t global_entry_data::get_timestamp_base() const {
|
||||||
return options_.timestamp ? *options_.timestamp : timestamp_base_;
|
return (options_.timestamp ? *options_.timestamp : timestamp_base_) /
|
||||||
|
options_.time_resolution_sec;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t global_entry_data::get_uid_index(uint16_t uid) const {
|
uint16_t global_entry_data::get_uid_index(uint16_t uid) const {
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
#include <climits>
|
#include <climits>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@ -626,6 +627,13 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
|
|||||||
auto timebase = meta_.timestamp_base();
|
auto timebase = meta_.timestamp_base();
|
||||||
auto inode = entry.inode();
|
auto inode = entry.inode();
|
||||||
bool mtime_only = meta_.options() && meta_.options()->mtime_only();
|
bool mtime_only = meta_.options() && meta_.options()->mtime_only();
|
||||||
|
uint32_t resolution = 1;
|
||||||
|
if (meta_.options()) {
|
||||||
|
if (auto res = meta_.options()->time_resolution_sec()) {
|
||||||
|
resolution = *res;
|
||||||
|
assert(resolution > 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
stbuf->st_mode = mode;
|
stbuf->st_mode = mode;
|
||||||
|
|
||||||
@ -635,11 +643,11 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
|
|||||||
stbuf->st_blocks = (stbuf->st_size + 511) / 512;
|
stbuf->st_blocks = (stbuf->st_size + 511) / 512;
|
||||||
stbuf->st_uid = entry.getuid();
|
stbuf->st_uid = entry.getuid();
|
||||||
stbuf->st_gid = entry.getgid();
|
stbuf->st_gid = entry.getgid();
|
||||||
stbuf->st_mtime = timebase + entry.mtime_offset();
|
stbuf->st_mtime = resolution * (timebase + entry.mtime_offset());
|
||||||
stbuf->st_atime =
|
stbuf->st_atime = mtime_only ? stbuf->st_mtime
|
||||||
mtime_only ? stbuf->st_mtime : timebase + entry.atime_offset();
|
: resolution * (timebase + entry.atime_offset());
|
||||||
stbuf->st_ctime =
|
stbuf->st_ctime = mtime_only ? stbuf->st_mtime
|
||||||
mtime_only ? stbuf->st_mtime : timebase + entry.ctime_offset();
|
: resolution * (timebase + entry.ctime_offset());
|
||||||
stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode)
|
stbuf->st_nlink = options_.enable_nlink && S_ISREG(mode)
|
||||||
? nlinks_.at(inode - chunk_index_offset_)
|
? nlinks_.at(inode - chunk_index_offset_)
|
||||||
: 1;
|
: 1;
|
||||||
|
@ -562,6 +562,9 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
|
|||||||
|
|
||||||
thrift::metadata::fs_options fsopts;
|
thrift::metadata::fs_options fsopts;
|
||||||
fsopts.mtime_only = !options_.keep_all_times;
|
fsopts.mtime_only = !options_.keep_all_times;
|
||||||
|
if (options_.time_resolution_sec > 1) {
|
||||||
|
fsopts.time_resolution_sec_ref() = options_.time_resolution_sec;
|
||||||
|
}
|
||||||
|
|
||||||
mv2.uids = ge_data.get_uids();
|
mv2.uids = ge_data.get_uids();
|
||||||
mv2.gids = ge_data.get_gids();
|
mv2.gids = ge_data.get_gids();
|
||||||
|
@ -98,6 +98,13 @@ const std::map<std::string, file_order_mode> order_choices{
|
|||||||
{"similarity", file_order_mode::SIMILARITY},
|
{"similarity", file_order_mode::SIMILARITY},
|
||||||
{"nilsimsa", file_order_mode::NILSIMSA}};
|
{"nilsimsa", file_order_mode::NILSIMSA}};
|
||||||
|
|
||||||
|
const std::map<std::string, uint32_t> time_resolutions{
|
||||||
|
{"sec", 1},
|
||||||
|
{"min", 60},
|
||||||
|
{"hour", 3600},
|
||||||
|
{"day", 86400},
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
@ -281,7 +288,8 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
|
|
||||||
block_manager::config cfg;
|
block_manager::config cfg;
|
||||||
std::string path, output, window_sizes, memory_limit, script_arg, compression,
|
std::string path, output, window_sizes, memory_limit, script_arg, compression,
|
||||||
schema_compression, metadata_compression, log_level, timestamp;
|
schema_compression, metadata_compression, log_level, timestamp,
|
||||||
|
time_resolution;
|
||||||
size_t num_workers, max_scanner_workers;
|
size_t num_workers, max_scanner_workers;
|
||||||
bool recompress = false, no_progress = false;
|
bool recompress = false, no_progress = false;
|
||||||
unsigned level;
|
unsigned level;
|
||||||
@ -292,6 +300,10 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
auto order_desc =
|
auto order_desc =
|
||||||
"file order (" + (from(order_choices) | get<0>() | unsplit(", ")) + ")";
|
"file order (" + (from(order_choices) | get<0>() | unsplit(", ")) + ")";
|
||||||
|
|
||||||
|
auto resolution_desc = "time resolution in seconds or (" +
|
||||||
|
(from(time_resolutions) | get<0>() | unsplit(", ")) +
|
||||||
|
")";
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
po::options_description opts("Command line options");
|
po::options_description opts("Command line options");
|
||||||
opts.add_options()
|
opts.add_options()
|
||||||
@ -340,6 +352,9 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
("keep-all-times",
|
("keep-all-times",
|
||||||
po::value<bool>(&options.keep_all_times)->zero_tokens(),
|
po::value<bool>(&options.keep_all_times)->zero_tokens(),
|
||||||
"save atime and ctime in addition to mtime")
|
"save atime and ctime in addition to mtime")
|
||||||
|
("time-resolution",
|
||||||
|
po::value<std::string>(&time_resolution)->default_value("sec"),
|
||||||
|
resolution_desc.c_str())
|
||||||
("order",
|
("order",
|
||||||
po::value<file_order_mode>(&options.file_order)
|
po::value<file_order_mode>(&options.file_order)
|
||||||
->default_value(file_order_mode::SIMILARITY, "similarity"),
|
->default_value(file_order_mode::SIMILARITY, "similarity"),
|
||||||
@ -541,6 +556,16 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
: folly::to<uint64_t>(timestamp);
|
: folly::to<uint64_t>(timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (auto it = time_resolutions.find(time_resolution);
|
||||||
|
it != time_resolutions.end()) {
|
||||||
|
options.time_resolution_sec = it->second;
|
||||||
|
} else {
|
||||||
|
options.time_resolution_sec = folly::to<uint32_t>(time_resolution);
|
||||||
|
if (options.time_resolution_sec == 0) {
|
||||||
|
throw std::runtime_error("timestamp resolution cannot be 0");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log_proxy<debug_logger_policy> log(lgr);
|
log_proxy<debug_logger_policy> log(lgr);
|
||||||
|
|
||||||
progress prog([&](const progress& p, bool last) { lgr.update(p, last); });
|
progress prog([&](const progress& p, bool last) { lgr.update(p, last); });
|
||||||
|
@ -89,6 +89,10 @@ struct entry {
|
|||||||
struct fs_options {
|
struct fs_options {
|
||||||
// file system contains only mtime time stamps
|
// file system contains only mtime time stamps
|
||||||
1: required bool mtime_only,
|
1: required bool mtime_only,
|
||||||
|
|
||||||
|
// time base and offsets are stored with this resolution
|
||||||
|
// 1 = seconds, 60 = minutes, 3600 = hours, ...
|
||||||
|
2: optional UInt32 time_resolution_sec,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct metadata {
|
struct metadata {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user