Allow specifying a filesystem image offset (fixes github #38)

Works for dwarfs, dwarfsck and dwarfsextract
This commit is contained in:
Marcus Holland-Moritz 2021-03-28 00:27:06 +01:00
parent b4a8e0134b
commit aa92e69d10
6 changed files with 145 additions and 31 deletions

View File

@ -65,7 +65,7 @@ class filesystem_v2 {
static int identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os,
int detail_level = 0, size_t num_readers = 1,
bool check_integrity = false);
bool check_integrity = false, off_t image_offset = 0);
void dump(std::ostream& os, int detail_level) const {
impl_->dump(os, detail_level);

View File

@ -25,6 +25,8 @@
#include <iosfwd>
#include <optional>
#include <sys/types.h>
namespace dwarfs {
enum class mlock_mode { NONE, TRY, MUST };
@ -42,7 +44,10 @@ struct metadata_options {
};
struct filesystem_options {
static constexpr off_t IMAGE_OFFSET_AUTO{-1};
mlock_mode lock_mode{mlock_mode::NONE};
off_t image_offset{0};
block_cache_options block_cache;
metadata_options metadata;
};

View File

@ -59,6 +59,7 @@ struct options {
const char* workers_str; // TODO: const?? -> use string?
const char* mlock_str; // TODO: const?? -> use string?
const char* decompress_ratio_str; // TODO: const?? -> use string?
const char* image_offset_str; // TODO: const?? -> use string?
int enable_nlink;
int readonly;
int cache_image;
@ -82,6 +83,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
DWARFS_OPT("workers=%s", workers_str, 0),
DWARFS_OPT("mlock=%s", mlock_str, 0),
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0),
DWARFS_OPT("offset=%s", image_offset_str, 0),
DWARFS_OPT("enable_nlink", enable_nlink, 1),
DWARFS_OPT("readonly", readonly, 1),
DWARFS_OPT("cache_image", cache_image, 1),
@ -112,6 +114,19 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
fsopts.block_cache.mm_release = !s_opts.cache_image;
fsopts.metadata.enable_nlink = bool(s_opts.enable_nlink);
fsopts.metadata.readonly = bool(s_opts.readonly);
if (s_opts.image_offset_str) {
std::string image_offset{s_opts.image_offset_str};
try {
fsopts.image_offset = image_offset == "auto"
? filesystem_options::IMAGE_OFFSET_AUTO
: folly::to<off_t>(image_offset);
} catch (...) {
DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset);
}
}
s_fs = std::make_shared<filesystem_v2>(
s_lgr, std::make_shared<mmap>(s_opts.fsimage), fsopts, FUSE_ROOT_ID);
@ -426,20 +441,22 @@ void op_statfs(fuse_req_t req, fuse_ino_t /*ino*/) {
}
void usage(const char* progname) {
std::cerr << "dwarfs (" << PRJ_GIT_ID << ", fuse version " << FUSE_USE_VERSION
<< ")\n\n"
<< "usage: " << progname << " image mountpoint [options]\n\n"
<< "DWARFS options:\n"
<< " -o cachesize=SIZE set size of block cache (512M)\n"
<< " -o workers=NUM number of worker threads (2)\n"
<< " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n"
<< " -o enable_nlink show correct hardlink numbers\n"
<< " -o readonly show read-only file system\n"
<< " -o (no_)cache_image (don't) keep image in kernel cache\n"
<< " -o (no_)cache_files (don't) keep files in kernel cache\n"
<< " -o debuglevel=NAME error, warn, (info), debug, trace\n"
<< std::endl;
std::cerr
<< "dwarfs (" << PRJ_GIT_ID << ", fuse version " << FUSE_USE_VERSION
<< ")\n\n"
<< "usage: " << progname << " image mountpoint [options]\n\n"
<< "DWARFS options:\n"
<< " -o cachesize=SIZE set size of block cache (512M)\n"
<< " -o workers=NUM number of worker threads (2)\n"
<< " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n"
<< " -o offset=NUM|auto filesystem image offset in bytes (0)\n"
<< " -o enable_nlink show correct hardlink numbers\n"
<< " -o readonly show read-only file system\n"
<< " -o (no_)cache_image (don't) keep image in kernel cache\n"
<< " -o (no_)cache_files (don't) keep files in kernel cache\n"
<< " -o debuglevel=NAME error, warn, (info), debug, trace\n"
<< std::endl;
#if FUSE_USE_VERSION >= 30
fuse_cmdline_help();

View File

@ -55,13 +55,71 @@ namespace {
class filesystem_parser {
public:
explicit filesystem_parser(std::shared_ptr<mmif> mm)
: mm_(mm) {
if (mm_->size() < sizeof(file_header)) {
static off_t find_image_offset(mmif& mm, off_t image_offset) {
if (image_offset != filesystem_options::IMAGE_OFFSET_AUTO) {
return image_offset;
}
static constexpr std::array<char, 7> magic{
{'D', 'W', 'A', 'R', 'F', 'S', MAJOR_VERSION}};
off_t start = 0;
for (;;) {
auto ps = mm.as<void>(start);
auto pc = ::memmem(ps, mm.size(), magic.data(), magic.size());
if (!pc) {
break;
}
off_t pos =
static_cast<uint8_t const*>(pc) - static_cast<uint8_t const*>(ps);
if (pos + sizeof(file_header) >= mm.size()) {
break;
}
auto fh = mm.as<file_header>(pos);
if (fh->minor < 2) {
// best we can do for older file systems
return pos;
}
// do a little more validation before we return
if (pos + sizeof(section_header_v2) >= mm.size()) {
break;
}
auto sh = mm.as<section_header_v2>(pos);
if (sh->number == 0) {
if (pos + 2 * sizeof(section_header_v2) + sh->length >= mm.size()) {
break;
}
ps = mm.as<void>(pos + sizeof(section_header_v2) + sh->length);
if (::memcmp(ps, magic.data(), magic.size()) == 0 and
reinterpret_cast<section_header_v2 const*>(ps)->number == 1) {
return pos;
}
}
start = pos + magic.size();
}
DWARFS_THROW(runtime_error, "no filesystem found");
}
explicit filesystem_parser(std::shared_ptr<mmif> mm, off_t image_offset = 0)
: mm_{mm}
, image_offset_{find_image_offset(*mm_, image_offset)} {
if (mm_->size() < image_offset_ + sizeof(file_header)) {
DWARFS_THROW(runtime_error, "file too small");
}
auto fh = mm_->as<file_header>();
auto fh = mm_->as<file_header>(image_offset_);
if (::memcmp(&fh->magic[0], "DWARFS", 6) != 0) {
DWARFS_THROW(runtime_error, "magic not found");
@ -83,7 +141,7 @@ class filesystem_parser {
}
std::optional<fs_section> next_section() {
if (offset_ < mm_->size()) {
if (offset_ < static_cast<off_t>(mm_->size())) {
auto section = fs_section(*mm_, offset_, version_);
offset_ = section.end();
return section;
@ -92,15 +150,20 @@ class filesystem_parser {
return std::nullopt;
}
void rewind() { offset_ = version_ == 1 ? sizeof(file_header) : 0; }
void rewind() {
offset_ = image_offset_ + (version_ == 1 ? sizeof(file_header) : 0);
}
std::string version() const {
return fmt::format("{0}.{1} [{2}]", major_, minor_, version_);
}
off_t image_offset() const { return image_offset_; }
private:
std::shared_ptr<mmif> mm_;
size_t offset_{0};
off_t const image_offset_;
off_t offset_{0};
int version_{0};
uint8_t major_{0};
uint8_t minor_{0};
@ -218,7 +281,7 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
int inode_offset)
: LOG_PROXY_INIT(lgr)
, mm_(std::move(mm)) {
filesystem_parser parser(mm_);
filesystem_parser parser(mm_, options.image_offset);
block_cache cache(lgr, mm_, options.block_cache);
section_map sections;
@ -475,13 +538,18 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
std::ostream& os, int detail_level,
size_t num_readers, bool check_integrity) {
size_t num_readers, bool check_integrity,
off_t image_offset) {
// TODO:
LOG_PROXY(debug_logger_policy, lgr);
filesystem_parser parser(mm);
filesystem_parser parser(mm, image_offset);
if (detail_level > 0) {
os << "FILESYSTEM version " << parser.version() << std::endl;
os << "DwarFS version " << parser.version();
if (auto off = parser.image_offset(); off > 0) {
os << " at offset " << off;
}
os << std::endl;
}
worker_group wg("reader", num_readers);
@ -540,6 +608,7 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
if (detail_level > 0) {
fsopts.metadata.enable_nlink = true;
}
fsopts.image_offset = image_offset;
filesystem_v2(lgr, mm, fsopts).dump(os, detail_level);
}

View File

@ -43,7 +43,7 @@ namespace po = boost::program_options;
int dwarfsck(int argc, char** argv) {
const size_t num_cpu = std::max(std::thread::hardware_concurrency(), 1u);
std::string log_level, input, export_metadata;
std::string log_level, input, export_metadata, image_offset;
size_t num_workers;
int detail;
bool json = false;
@ -58,6 +58,9 @@ int dwarfsck(int argc, char** argv) {
("detail,d",
po::value<int>(&detail)->default_value(2),
"detail level")
("image-offset,O",
po::value<std::string>(&image_offset)->default_value("auto"),
"filesystem image offset in bytes")
("num-workers,n",
po::value<size_t>(&num_workers)->default_value(num_cpu),
"number of reader worker threads")
@ -101,22 +104,31 @@ int dwarfsck(int argc, char** argv) {
LOG_PROXY(debug_logger_policy, lgr);
try {
filesystem_options fsopts;
try {
fsopts.image_offset = image_offset == "auto"
? filesystem_options::IMAGE_OFFSET_AUTO
: folly::to<off_t>(image_offset);
} catch (...) {
DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset);
}
auto mm = std::make_shared<mmap>(input);
if (!export_metadata.empty()) {
auto of = folly::File(export_metadata, O_RDWR | O_CREAT | O_TRUNC);
filesystem_v2 fs(lgr, mm);
filesystem_v2 fs(lgr, mm, fsopts);
auto json = fs.serialize_metadata_as_json(true);
if (folly::writeFull(of.fd(), json.data(), json.size()) < 0) {
LOG_ERROR << "failed to export metadata";
}
of.close();
} else if (json) {
filesystem_v2 fs(lgr, mm);
filesystem_v2 fs(lgr, mm, fsopts);
std::cout << folly::toPrettyJson(fs.metadata_as_dynamic()) << std::endl;
} else {
filesystem_v2::identify(lgr, mm, std::cout, detail, num_workers,
check_integrity);
check_integrity, fsopts.image_offset);
}
} catch (system_error const& e) {
LOG_ERROR << folly::exceptionStr(e);

View File

@ -42,7 +42,8 @@ using namespace dwarfs;
namespace {
int dwarfsextract(int argc, char** argv) {
std::string filesystem, output, format, cache_size_str, log_level;
std::string filesystem, output, format, cache_size_str, log_level,
image_offset;
size_t num_workers;
// clang-format off
@ -54,6 +55,9 @@ int dwarfsextract(int argc, char** argv) {
("output,o",
po::value<std::string>(&output),
"output file or directory")
("image-offset,O",
po::value<std::string>(&image_offset)->default_value("auto"),
"filesystem image offset in bytes")
("format,f",
po::value<std::string>(&format),
"output format")
@ -89,6 +93,13 @@ int dwarfsextract(int argc, char** argv) {
try {
stream_logger lgr(std::cerr, logger::parse_level(log_level));
filesystem_options fsopts;
try {
fsopts.image_offset = image_offset == "auto"
? filesystem_options::IMAGE_OFFSET_AUTO
: folly::to<off_t>(image_offset);
} catch (...) {
DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset);
}
fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str);
fsopts.block_cache.num_workers = num_workers;